fused multiply adds added

master
kaqu 2 years ago
parent b58d7737fa
commit f32aa07698
  1. 38
      libmodules/fpu_decode.py
  2. 22
      libmodules/instruction_decode.py
  3. 4
      software/source/flwstw.c

@ -31,6 +31,10 @@ class Risq5FPUDecoder(Module):
self.fmul = Signal()
self.fdiv = Signal()
self.fsqrt = Signal()
self.fmadd = Signal()
self.fmsub = Signal()
self.fnmadd = Signal()
self.fnmsub = Signal()
self.fready = Signal() # Indicate ready
self.fwrite = Signal() # F-Extension: Do a write to a float register
@ -66,17 +70,17 @@ class Risq5FPUDecoder(Module):
NextValue(self.m1, Cat(0,0,0, regs.fs1[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
NextValue(self.m2, Cat(0,0,0, regs.fs2[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
NextState("FADD1")
).Elif((self.fmul | self.fdiv) & ~self.fready, # Triggers set & ready flag reset externally!
).Elif((self.fmadd | self.fmsub | self.fnmadd | self.fnmsub | self.fmul | self.fdiv) & ~self.fready, # Triggers set & ready flag reset externally!
NextValue(self.sign1, regs.fs1[31]),
NextValue(self.sign2, regs.fs2[31]),
NextValue(self.e1, regs.fs1[23:31] - 127),
NextValue(self.e2, regs.fs2[23:31] - 127),
NextValue(self.m1, Cat(regs.fs1[0:23], 1, 0)), # | 0x00800000
NextValue(self.m2, Cat(regs.fs2[0:23], 1, 0)), # | 0x00800000
If(self.fmul, # Multiplication
If(self.fdiv, # Division
NextState("FDIV1"),
).Else( # Multiplication variants
NextState("FMUL1"),
).Else( # Division
NextState("FDIV1")
)
).Elif(self.fsqrt & ~self.fready, # Trigger set & ready flag reset externally!
NextValue(self.sign1, regs.fs1[31]),
@ -111,17 +115,23 @@ class Risq5FPUDecoder(Module):
).Elif(regs.fs1[0:31] == 0, # Nothing to add? (w/o sign!)
If(self.fsub, # Subtract yields negative result!
NextValue(regs.frd_wrport.dat_w, regs.fs2 ^ 0x80000000), # Invert sign
).Elif(self.fmsub | self.fnmsub, # 0*x=>0! 0-fs3 = +fs3!
NextValue(regs.frd_wrport.dat_w, regs.fs3 ^ 0x80000000), # Invert sign
).Elif(self.fmadd | self.fnmadd, # 0*x=>0! 0+fs3 = fs3!
NextValue(regs.frd_wrport.dat_w, regs.fs3), # Ready!
).Else( # Straight add
NextValue(regs.frd_wrport.dat_w, regs.fs2), # Ready!
),
NextValue(self.fwrite, 1),
NextValue(self.fready, 1),
NextState("FPU_IDLE")
).Elif(regs.fs2[0:31] == 0, # Nothing to add? (w/o sign!)
).Elif((self.fadd | self.fsub) & (regs.fs2[0:31] == 0), # Nothing to add? (w/o sign!)
NextValue(regs.frd_wrport.dat_w, regs.fs1), # Ready!
NextValue(self.fwrite, 1),
NextValue(self.fready, 1),
NextState("FPU_IDLE")
NextState("FPU_IDLE")
).Elif((self.fmadd | self.fmsub | self.fnmadd | self.fnmsub) & ((self.e2 == 0) & (self.m2 == 0)), # Nothing to add (w/o sign!)
NextState("FRESULT") # Just supply (normalized finally!) result from multiplication!
).Else( # Ok, valid floats supplied ...
NextValue(self.s_bit, 0),
NextValue(self.branch1, 0), # Reset helpers
@ -319,8 +329,22 @@ class Risq5FPUDecoder(Module):
# 6. Construction of result
NextValue(self.m3, (self.lm3 >> 23) & 0x7FFFFF),
# TODO: e3=se3 omitted ok?
NextState("FRESULT")
If(self.fmul, # Simple multiplication
NextState("FRESULT")
).Else( # Fused multiply-add?
NextValue(self.sign3, self.sign3 ^ (self.fnmadd | self.fnmsub)), # Negate mult. result w/ f<n>xxx
NextState("FMADD1")
)
) # End of fmul.s processing
FPU_fsm.act("FMADD1",
NextValue(self.sign1, self.sign3), # Negate mult. result w/ f<n>xxx
NextValue(self.sign2, regs.fs3[31] ^ (self.fmsub | self.fnmsub)), # Invert sign for subtraction!
NextValue(self.e1, self.e3),
NextValue(self.e2, regs.fs3[23:31] - 127),
NextValue(self.m1, Cat(0,0,0, self.m3[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
NextValue(self.m2, Cat(0,0,0, regs.fs3[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
NextState("FADD1")
)
FPU_fsm.act("FDIV1",
NextValue(self.FPU_state, 1),

@ -319,14 +319,14 @@ class Risq5Decoder(Module):
NextValue(SU_Unit.bData, regs.fs2), # Pick actual value to store (from fs2) & load SU
NextValue(self.SUByteID, 7), # Type: Word (6->7!)
NextValue(self.SUStore, 1), # Enforce store unit engagement (for now: allways!)
).Elif(regs.op == 0x43, # R4-Type: fmadd.s
NextValue(self.DECODE_state, 0x0F), # Dummy action
).Elif(regs.op == 0x47, # R4-Type: fmsub.s
NextValue(self.DECODE_state, 0x0F), # Dummy action
).Elif(regs.op == 0x4B, # R4-Type: fnmsub.s
NextValue(self.DECODE_state, 0x0F), # Dummy action
).Elif(regs.op == 0x4F, # R4-Type: fnmadd.s
NextValue(self.DECODE_state, 0x0F), # Dummy action
).Elif(regs.op == 0x43, # R4-Type: fmadd.s frd, frs1, frs2, frs3
NextValue(fpu_decoder.fmadd, 1), # Post fmadd.s indication
).Elif(regs.op == 0x47, # R4-Type: fmsub.s frd, frs1, frs2, frs3
NextValue(fpu_decoder.fmsub, 1), # Post fmsub.s indication
).Elif(regs.op == 0x4B, # R4-Type: fnmsub.s frd, frs1, frs2, frs3
NextValue(fpu_decoder.fnmsub, 1), # Post fnmsub.s indication
).Elif(regs.op == 0x4F, # R4-Type: fnmadd.s frd, frs1, frs2, frs3
NextValue(fpu_decoder.fnmadd, 1), # Post fnmadd.s indication
).Elif(regs.op == 0x53, # R-Type
If(regs.f7 == 0x00, # fadd.s frd, frs1, frs2
NextValue(fpu_decoder.fadd, 1), # Post fadd.s indication
@ -405,7 +405,7 @@ class Risq5Decoder(Module):
NextValue(self.next, 1), # Indicate ready state to ALU
NextState("DECODE_IDLE") # No write!
).Else(
If(fpu_decoder.fadd | fpu_decoder.fsub | fpu_decoder.fmul | fpu_decoder.fdiv | fpu_decoder.fsqrt, # FPU logic engaging?
If(fpu_decoder.fmadd | fpu_decoder.fmsub | fpu_decoder.fnmadd | fpu_decoder.fnmsub | fpu_decoder.fadd | fpu_decoder.fsub | fpu_decoder.fmul | fpu_decoder.fdiv | fpu_decoder.fsqrt, # FPU logic engaging?
NextState("FPU_WAIT") # Reset request within FPU_WAIT!
).Elif(self.div_instruction != 0, # M extension divide instruction?
If(regs.xs2s == 0x0, # RISC-V: Doesn't raise exception div/0!
@ -630,6 +630,10 @@ class Risq5Decoder(Module):
NextValue(fpu_decoder.fmul, 0),
NextValue(fpu_decoder.fdiv, 0),
NextValue(fpu_decoder.fsqrt, 0),
NextValue(fpu_decoder.fmadd, 0),
NextValue(fpu_decoder.fmsub, 0),
NextValue(fpu_decoder.fnmsub,0),
NextValue(fpu_decoder.fnmadd, 0),
NextState("DECODE_WRITE")
)
)

@ -10,6 +10,10 @@ repeat: fadd.s f2,f0,f1 # f2 = f0 + f1 \n\
fmul.s f2,f0,f1 # f2 = f0 * f1 \n\
fdiv.s f2,f0,f1 # f2 = f0 / f1 \n\
fsqrt.s f2,f0 # f2 = sqrt(f0) \n\
fmadd.s f3,f0,f1,f2 # f3 = f0*f1+f2 \n\
fmsub.s f3,f0,f1,f2 # f3 = f0*f1-f2 \n\
fnmadd.s f3,f0,f1,f2 # f3 = -(f0*f1)+f2 \n\
fnmsub.s f3,f0,f1,f2 # f3 = -(f0*f1)-f2 \n\
j repeat # Loop ... \n\
lw x3,0(sp) # Load float from stack (test) \n\
lw x3,4(sp) # Load float from stack (test) \n\

Loading…
Cancel
Save