fsqrt ready

master
kaqu 2 years ago
parent e943825990
commit b58d7737fa
  1. 2
      debugger/dbgeval.py
  2. 61
      libmodules/fpu_decode.py
  3. 5
      libmodules/instruction_decode.py
  4. 1
      software/source/flwstw.c

@ -234,6 +234,8 @@ def disassemble(opcode, pc):
msg = "fmul.s {0},{1},{2}".format(f_description[rd], f_description[rs1], f_description[rs2])
elif f7 == 0x0C: # fdiv
msg = "fdiv.s {0},{1},{2}".format(f_description[rd], f_description[rs1], f_description[rs2])
elif f7 == 0x2C: # fsqrt
msg = "fsqrt.s {0},{1}".format(f_description[rd], f_description[rs1])
elif f7 == 0x10: # sign injection/inverted/xor'ed
if f3 == 0x00:
msg = "fsgnj.s {0},{1},{2}".format(f_description[rd], f_description[rs1], f_description[rs2])

@ -30,6 +30,7 @@ class Risq5FPUDecoder(Module):
self.fsub = Signal()
self.fmul = Signal()
self.fdiv = Signal()
self.fsqrt = Signal()
self.fready = Signal() # Indicate ready
self.fwrite = Signal() # F-Extension: Do a write to a float register
@ -44,6 +45,8 @@ class Risq5FPUDecoder(Module):
self.m2 = Signal((24+1+3,False), reset_less=True) # 23 bits + 1bit (1.xx = 0x800000)
self.m3 = Signal((25+1+3,True), reset_less=True) # + Sign + R(0)/Guard & Sticky bits
self.lm3 = Signal((64,True), reset_less=True) # MUL long result
self.s32 = Signal((32,True), reset_less=True) # Signed 32-bit
self.s_bit = Signal() # Sticky bit (for rounding control)
self.branch1 = Signal() # Branch helpers
self.branch2 = Signal()
@ -75,6 +78,11 @@ class Risq5FPUDecoder(Module):
).Else( # Division
NextState("FDIV1")
)
).Elif(self.fsqrt & ~self.fready, # Trigger set & ready flag reset externally!
NextValue(self.sign1, regs.fs1[31]),
NextValue(self.e1, regs.fs1[23:31] - 127),
NextValue(self.m1, Cat(regs.fs1[0:23], 1, 0)), # | 0x00800000
NextState("FSQRT1"),
)
)
@ -376,6 +384,59 @@ class Risq5FPUDecoder(Module):
)
) # End of fdiv.s processing
FPU_fsm.act("FSQRT1",
NextValue(self.FPU_state, 1),
# 1. Verify valid ranges 1st!
If((regs.fs1[0:31] == 0x7FFFFFFF) | self.sign1,
NextValue(regs.fcs, regs.fcs | 0x10), # NV: Invalid operation
NextValue(regs.frd_wrport.dat_w, 0x7FFFFFFF), # NAN
NextValue(self.fwrite, 1),
NextValue(self.fready, 1),
NextState("FPU_IDLE")
).Elif(self.e1 == -1, # Infinity
NextValue(regs.fcs, regs.fcs | 0x04), # OF: Overflow
NextValue(regs.frd_wrport.dat_w, regs.fs1), # Return +/- infinity
NextValue(self.fwrite, 1),
NextValue(self.fready, 1),
NextState("FPU_IDLE")
).Else( # Better fast, than accurate! Use Newton-Raphson in S/W for better accuracy!
# Goldschmidt's algorithm (only 1 digit after decimal point ok, error varies, s.b)
If((self.m1[0:23] != 0) | (self.e1 == 1), # Not 2^x (m==0!) and x!=1
#return sqrt_approx(f, 0x0004B0D2); // Minimized error (max. 3.5%)
NextValue(self.branch1, 1), # Use 0x0004B0D2 for minimized error (<= 3.5%)
).Else(
NextValue(self.branch1, 0), # Use 0x00000000, only for 2^x exact, others up to ~6% error
),
NextValue(self.s32, regs.fs1), # Pick up float value for manipulation
NextState("FSQRT2")
)
)
FPU_fsm.act("FSQRT2",
NextValue(self.FPU_state, 2),
# 1 << 23 /* Subtract 2^m. (0x40000000) */
# >> 1; /* Divide by 2. */
# 1 << 29 /* Add ((b + 1) / 2) * 2^m. */
If(self.branch1,
NextValue(self.s32, ((self.s32 - 0x00800000) >> 1) + (0x20000000 - 0x0004B0D2)), # Error minimizer term!
).Else(
NextValue(self.s32, ((self.s32 - 0x00800000) >> 1) + 0x20000000),
),
NextState("FSQRT3")
)
FPU_fsm.act("FSQRT3",
NextValue(self.FPU_state, 3),
NextValue(regs.frd_wrport.dat_w, self.s32), # Just map value straight ...
If((self.s32[0:31] != 0) & self.s32[23:31] == 1, # FLT_MIN range & not zero (already!)
NextValue(regs.fcs, regs.fcs | 0x02), # UF: Underflow
If(regs.fcs[5:8] == 0x01, # RTZ rounding
NextValue(regs.frd_wrport.dat_w, 0), # FLT_MIN -> Round to zero -> 0.0!
)
),
NextValue(self.fwrite, 1), # Write required
NextValue(self.fready, 1), # Indicate ready to main decoder
NextState("FPU_IDLE")
) # End of fsqrt.s processing
if __name__ == "__main__":
print("***** Register file is passive ... ;) *****")

@ -337,7 +337,7 @@ class Risq5Decoder(Module):
).Elif(regs.f7 == 0x0C, # fdiv.s
NextValue(fpu_decoder.fdiv, 1), # Post fdiv.s indication
).Elif(regs.f7 == 0x2C, # fsqrt.s
NextValue(self.DECODE_state, 0x0F), # Dummy action
NextValue(fpu_decoder.fsqrt, 1), # Post fsqrt.s indication
).Elif(regs.f7 == 0x10, # sign injection/inverted/xor'ed ALL OK!
If(regs.f3 == 0x00, # fsgnj.s rd, rs1, rs2 (copy rs2 sign to rs1 -> rd)
NextValue(regs.frd_wrport.dat_w, Cat(regs.fs1[0:31], regs.fs2[31])),
@ -405,7 +405,7 @@ class Risq5Decoder(Module):
NextValue(self.next, 1), # Indicate ready state to ALU
NextState("DECODE_IDLE") # No write!
).Else(
If(fpu_decoder.fadd | fpu_decoder.fsub | fpu_decoder.fmul | fpu_decoder.fdiv, # FPU logic engaging?
If(fpu_decoder.fadd | fpu_decoder.fsub | fpu_decoder.fmul | fpu_decoder.fdiv | fpu_decoder.fsqrt, # FPU logic engaging?
NextState("FPU_WAIT") # Reset request within FPU_WAIT!
).Elif(self.div_instruction != 0, # M extension divide instruction?
If(regs.xs2s == 0x0, # RISC-V: Doesn't raise exception div/0!
@ -629,6 +629,7 @@ class Risq5Decoder(Module):
NextValue(fpu_decoder.fsub, 0),
NextValue(fpu_decoder.fmul, 0),
NextValue(fpu_decoder.fdiv, 0),
NextValue(fpu_decoder.fsqrt, 0),
NextState("DECODE_WRITE")
)
)

@ -9,6 +9,7 @@ repeat: fadd.s f2,f0,f1 # f2 = f0 + f1 \n\
fsub.s f2,f0,f1 # f2 = f0 - f1 \n\
fmul.s f2,f0,f1 # f2 = f0 * f1 \n\
fdiv.s f2,f0,f1 # f2 = f0 / f1 \n\
fsqrt.s f2,f0 # f2 = sqrt(f0) \n\
j repeat # Loop ... \n\
lw x3,0(sp) # Load float from stack (test) \n\
lw x3,4(sp) # Load float from stack (test) \n\

Loading…
Cancel
Save