fcnvt.w.s bug in 0..1 range

master
kaqu 2 years ago
parent ff357d54bf
commit 967388bd40
  1. 5
      debugger/dbgeval.py
  2. 8
      libmodules/core.py
  3. 81
      libmodules/fpu_decode.py
  4. 29
      libmodules/instruction_decode.py
  5. 2
      software/source/flwstw.c

@ -257,6 +257,11 @@ def disassemble(opcode, pc):
msg = "fmin.s {0},{1},{2}".format(f_description[rd], f_description[rs1], f_description[rs2])
elif f3 == 0x01:
msg = "fmax.s {0},{1},{2}".format(f_description[rd], f_description[rs1], f_description[rs2])
elif f7 == 0x60: # integer conversion
if rs2 == 0x00:
msg = "fcnv.w.s {0},{1}".format(x_description[rd], f_description[rs1])
else:
msg = "fcnv.wu.s {0},{1}".format(x_description[rd], f_description[rs1])
elif f7 == 0x70:
if f3 == 0x00:
msg = "fmv.x.s {0},{1}".format(x_description[rd], f_description[rs1])

@ -209,8 +209,11 @@ class Risq5Core(Module, AutoCSR, AutoDoc, ModuleDoc):
self.LU_CacheValid = Signal() # Indicate loaded LU cache
self.sync += self.b32status.storage[5].eq(self.LU_CacheValid) # Indicate cache status to external world
# Shared post for base & all extensions ...
self.writepost = Signal()
# Integrate F-Extension decode
self.submodules.fpu_decoder = fpu_decoder = Risq5FPUDecoder(regs=regs)
self.submodules.fpu_decoder = fpu_decoder = Risq5FPUDecoder(regs=regs, writepost=self.writepost)
# Integrate an instruction decoder
self.submodules.decoder = decoder = Risq5Decoder(
@ -224,7 +227,8 @@ class Risq5Core(Module, AutoCSR, AutoDoc, ModuleDoc):
LUCache=LUCache,
SU_Unit=SU_Unit,
isa_extensions=isa_extensions,
fpu_decoder=fpu_decoder
fpu_decoder=fpu_decoder,
writepost=self.writepost
)
#---------------- L1 cache -------------------------------------------------------------

@ -22,8 +22,9 @@ class Risq5FPUDecoder(Module):
"""
Risq5 FPU instruction decoder
"""
def __init__(self, regs=None):
def __init__(self, regs=None, writepost=None):
assert isinstance(regs, Risq5RegisterFile)
assert isinstance(writepost, Signal)
# F-Extension: Job triggers
self.fadd = Signal()
@ -92,10 +93,11 @@ class Risq5FPUDecoder(Module):
).Elif((self.fsqrt | self.fcvt_w_s) & ~self.fready, # Trigger set & ready flag reset externally!
NextValue(self.sign1, regs.fs1[31]),
NextValue(self.e1, regs.fs1[23:31] - 127),
NextValue(self.m1, Cat(regs.fs1[0:23], 1, 0)), # | 0x00800000
If(self.fsqrt,
NextValue(self.m1, Cat(regs.fs1[0:23], 1, 0)), # | 0x00800000
NextState("FSQRT1"),
).Else( # Conversion to integer ...
NextValue(self.m1, Cat(regs.fs1[0:23], 0, 0)), # W/o 0x00800000 !
NextState("FCVT_W_S1")
)
)
@ -555,9 +557,80 @@ class Risq5FPUDecoder(Module):
) # End of fmax.s processing
FPU_fsm.act("FCVT_W_S1",
NextState("FCVT_W_S2")
If((self.e1 < 0) & self.fcvt_w_s, # Nothing to do ...
NextValue(regs.rd_wrport.dat_w, 0), # Just return zero!
NextValue(writepost, 1), # Write required (but integer register)
NextValue(self.fready, 1), # Indicate ready to main decoder
NextState("FPU_IDLE")
).Else( # Relevant portion existing ...
NextValue(self.s32, 1), # Starter/initial value req.
NextState("FCVT_W_S2")
)
)
FPU_fsm.act("FCVT_W_S2",
# 1. De-Normalization: Positive exponent dictates pre-decimal point value 1nnnn.xxx (from 1.xxx)
If(self.e1 > 0,
If(self.m1[22], # & 0x400000
NextValue(self.s32, (self.s32 << 1) | 1)
).Else(
NextValue(self.s32, self.s32 << 1),
),
NextValue(self.e1, self.e1 - 1),
NextValue(self.m1, self.m1 << 1)
).Else(
NextValue(self.m1, self.m1 & 0x7FFFFF), # Cut to normal (after shifts, s.a.)
NextState("FCVT_W_S3")
)
)
FPU_fsm.act("FCVT_W_S3",
If(regs.fcs[5:8] == 0x0, # rte/Round to nearest, ties to even
If(self.m1 == 0x400000, # Excactly 0.5?
If(self.s32[0], # NOT even number?
NextValue(self.s32, self.s32 + 1) # Tie to even!
)
).Else(
If(self.m1 > 0x400000, # >= 0.5
NextValue(self.s32, self.s32 + 1) # Round upward
)
# Else (< 0.5): Round downward -> trunc (do nothing)
)
).Elif(regs.fcs[5:8] == 0x2, # rdn/Round down towards -inf
If(self.m1 > 0, # There is a rest ...
If(self.sign1, # Negative: Add to next integer
NextValue(self.s32, self.s32 + 1) # Round upward
)
# Positive to -inf: Just cut-off!
)
).Elif(regs.fcs[5:8] == 0x3, # rup/Round up towards +inf
If(self.m1 > 0, # There is a rest ...
If(self.sign1, # Positive: Add to next integer
NextValue(self.s32, self.s32 + 1) # Round upward
)
# Negative to +inf: Just cut-off!
)
).Elif(regs.fcs[5:8] == 0x4, # rmm/Round to nearest, ties to max magnitude
If(self.m1 == 0x400000, # Excactly 0.5?
NextValue(self.s32, self.s32 + 1) # Tie to inf (no matter what sign?!)
).Else(
If(self.m1 > 0x400000, # >= 0.5
NextValue(self.s32, self.s32 + 1) # Round upward
)
# Else (< 0.5): Round downward -> trunc (do nothing)
)
),
# Else: 0x1=rtz/Round to zero => Trunc (do nothing)
NextState("FCVT_W_S4")
)
FPU_fsm.act("FCVT_W_S4",
If(self.sign1,
NextValue(regs.rd_wrport.dat_w, Cat(self.s32[0:31], 1)), # Adjust sign
).Else(
NextValue(regs.rd_wrport.dat_w, self.s32), # Just map value straight ...
),
NextValue(writepost, 1), # Write to rd/integer register required!
NextValue(self.fready, 1), # Indicate ready to main decoder
NextState("FPU_IDLE")
)
# End of fcvt.w.s processing

@ -25,10 +25,11 @@ class Risq5Decoder(Module):
"""
def __init__(self, regs=None, modereg=None, statusreg=None, L1CacheOffset=None, L1CacheSize=32,
LU_CacheWait=24, LU_CacheValid=None, LUCache=None, SU_Unit=None,
isa_extensions=0x40000100, fpu_decoder=None
isa_extensions=0x40000100, fpu_decoder=None, writepost=None
):
assert isinstance(regs, Risq5RegisterFile)
assert isinstance(fpu_decoder, Risq5FPUDecoder)
assert isinstance(writepost, Signal)
self.start = Signal() # Start decoding signal
self.next = Signal() # Signal readyness ...
@ -37,7 +38,7 @@ class Risq5Decoder(Module):
self.LUByteID = Signal(3, reset_less=True) # Uneven adressing remainder (0..3=byte, 4..5=halfword, 6=word)
self.SUStore = Signal() # Signal SU to store a value
self.SUByteID = Signal(3, reset_less=True) # Uneven adressing remainder (0..3=byte, 4..5=halfword, 6=not used/direct write)
self.write = Signal() # Do a write (local register)
#self.write = Signal() # Do a write (local register) -> moved to writepost
self.L1Below = Signal(reset_less=True)
self.L1Beyond = Signal(reset_less=True) # Local cache boundaries (only valid w/ branches!)
@ -90,7 +91,7 @@ class Risq5Decoder(Module):
If(self.start, # Do the job when triggered
#NextValue(self.next, 0), # Indicate not yet there ... -> Reset @ top level!
NextValue(self.start, 0), # Once!
NextValue(self.write, 0), # Trigger write rd reset
NextValue(writepost, 0), # Trigger write rd reset
# F-Extension
NextValue(fpu_decoder.fready, 0), # Clear ready indication
@ -188,7 +189,7 @@ class Risq5Decoder(Module):
NextValue(self.div_instruction, 4)
)
),
NextValue(self.write, 1), # Trigger write rd
NextValue(writepost, 1), # Trigger write rd
).Elif(regs.op == 0x63, # B-Type
If(((regs.f3 == 0x00) & (regs.xs1s == regs.xs2s)) # beq xs1, xs2, label \
|((regs.f3 == 0x01) & (regs.xs1u != regs.xs2u)) # bne xs1, xs2, label (unsigned) \
@ -257,7 +258,7 @@ class Risq5Decoder(Module):
).Elif(regs.f3 == 0x07, # andi rd, rs1, imm_i
NextValue(regs.rd_wrport.dat_w, regs.xs1u & regs.imm_i),
),
NextValue(self.write, 1), # Trigger write rd
NextValue(writepost, 1), # Trigger write rd
# fence & fence.i memory & i/o read/write ordering observation not implemented/ignored ...
).Elif(regs.op == 0x0F, # I-Type (3) fence/fence.i
NextValue(self.DECODE_state, 0x0F), # No action at all!
@ -280,16 +281,16 @@ class Risq5Decoder(Module):
NextValue(regs.csr[regs.csrindex], regs.rs1), # csr (new value)
),
),
NextValue(self.write, 1), # Trigger write rd
NextValue(writepost, 1), # Trigger write rd
).Elif(regs.op == 0x37, # U-Type (1): lui rd, imm[31:12] (load unsigned int)
NextValue(regs.rd_wrport.dat_w, regs.imm_u << 12), # Shift to upper 12-bit
NextValue(self.write, 1), # Trigger write rd
NextValue(writepost, 1), # Trigger write rd
).Elif(regs.op == 0x17, # U-Type (2) auipc rd, imm[31:12] (Add upper imm. to pc)
NextValue(regs.rd_wrport.dat_w, regs.pc + (regs.imm_u << 12)), # Shift to upper 12-bit
NextValue(self.write, 1), # Trigger write rd
NextValue(writepost, 1), # Trigger write rd
).Elif(regs.op == 0x6F, # J-Type (1) jal rd, offset
NextValue(regs.rd_wrport.dat_w, regs.pc + 4), # x[xd] = pc+4
NextValue(self.write, 1), # Trigger write rd
NextValue(writepost, 1), # Trigger write rd
NextValue(regs.pc, regs.pc - 4 + regs.imm_j), # jump (w/ compensation!)
# Att.: Compensate L1CacheOffset + 1!
If(self.L1BelowJ | self.L1BeyondJ, # Cache exceeded?
@ -300,7 +301,7 @@ class Risq5Decoder(Module):
).Elif(regs.op == 0x67, # I-Type: jalr rd, imm(rs1)
#If(regs.f3 == 0x00,
NextValue(regs.rd_wrport.dat_w, regs.pc + 4), # x[xd] = pc+4
NextValue(self.write, 1), # Trigger write rd
NextValue(writepost, 1), # Trigger write rd
NextValue(regs.pc, (regs.xs1u - 4 + regs.imm_i) & ~1), # jump (w/ compensation!)
# Att.: Compensate L1CacheOffset + 1!
If(self.L1BelowI | self.L1BeyondI, # Cache exceeded?
@ -355,14 +356,14 @@ class Risq5Decoder(Module):
)
).Elif(regs.f7 == 0x60, # fcvt.w
If(regs.rs2 == 0x00, # fcvt.w.s rd, frs1
NextValue(fpu_decoder.fcvt_w_s)
NextValue(fpu_decoder.fcvt_w_s, 1)
).Else( #regs.rs2 == 0x01, # fcvt.wu.s
NextValue(self.DECODE_state, 0x0F), # Dummy action
)
).Elif(regs.f7 == 0x70, # fmv/fclass
If(regs.f3 == 0x00, # fmv.x.s rd, frs1 (f-reg -> x-reg) OK!
NextValue(regs.rd_wrport.dat_w, regs.fs1),
NextValue(self.write, 1), # Trigger write rd (x-reg)
NextValue(writepost, 1), # Trigger write rd (x-reg)
).Else( #regs.f3 == 0x01, # fclass.s
NextValue(self.DECODE_state, 0x0F), # Dummy action
)
@ -446,7 +447,7 @@ class Risq5Decoder(Module):
NextValue(SU_Unit.bEnable.storage, 1), # Start writing to DRAM
NextState("DECODE_WRITEWAIT")
).Else( # No load no more ...
If(self.write & (regs.rd != 0), # Do NOT write to X0 (never! period!)
If(writepost & (regs.rd != 0), # Do NOT write to X0 (never! period!)
NextValue(regs.rd_wrport.we, 1), # Write value enable (delayed)
).Elif(fpu_decoder.fwrite, # F-Extension: frd write
NextValue(regs.frd_wrport.we, 1), # Write float value enable (delayed)
@ -531,7 +532,7 @@ class Risq5Decoder(Module):
If(regs.op == 0x07, # F-Extension: flw ->frd
NextValue(fpu_decoder.fwrite, 1)
).Else( # lb/lh/lw ->rd
NextValue(self.write, 1), # Indicate WRITE (rd) validity (now ready!)
NextValue(writepost, 1), # Indicate WRITE (rd) validity (now ready!)
),
NextState("DECODE_WRITE") # Load delay for opcode cache is next
)

@ -6,6 +6,8 @@ static void start(void)
lui sp,%hi(0x40192000) # Setup stack pointer \n\
addi sp,sp,%lo(0x40192000) # s.a. \n\
repeat: fadd.s f2,f0,f1 # f2 = f0 + f1 \n\
fcvt.w.s x3,f3 # x3 = int(f3) \n\
j repeat # Loop ... \n\
fmin.s f2,f0,f1 # f2 = min(f0,f1) \n\
fmax.s f2,f0,f1 # f2 = max(f0,f1) \n\
fsub.s f2,f0,f1 # f2 = f0 - f1 \n\

Loading…
Cancel
Save