528 lines
27 KiB
Python
528 lines
27 KiB
Python
#!/usr/bin/env python3
|
|
|
|
#
|
|
# bfloat16processor.py
|
|
#
|
|
# bfloat16 processing (1 bit sign, 8 bit exponent, 7 bit mantissa)
|
|
#
|
|
# History:
|
|
# --------
|
|
# 22.04.21/KQ Initial version
|
|
#
|
|
|
|
from migen import *
|
|
from migen.fhdl.specials import Memory
|
|
from litex.soc.interconnect.csr import *
|
|
from litex.soc.integration.doc import AutoDoc, ModuleDoc
|
|
|
|
class bfloat16Processor(Module):
|
|
"""
|
|
bfloat16 FPU logic
|
|
"""
|
|
def __init__(self):
|
|
|
|
# Inputs
|
|
self.fs1 = Signal(32, reset_less=True) # Float register #1
|
|
self.fs2 = Signal(32, reset_less=True) # Float register #2
|
|
self.fs3 = Signal(32, reset_less=True) # Float register #3
|
|
# Output
|
|
self.fresult = Signal(32, reset_less=True) # Float result
|
|
|
|
# F-Extension: Job triggers
|
|
self.fadd = Signal()
|
|
self.fsub = Signal()
|
|
self.fmul = Signal()
|
|
self.fdiv = Signal()
|
|
self.fsqrt = Signal()
|
|
self.fmadd = Signal()
|
|
self.fmsub = Signal()
|
|
self.fnmadd = Signal()
|
|
self.fnmsub = Signal()
|
|
self.fmin = Signal()
|
|
self.fmax = Signal()
|
|
|
|
self.fready = Signal() # Indicate ready
|
|
|
|
# Calculation support variables
|
|
self.sign1 = Signal() # Sign of floats
|
|
self.sign2 = Signal()
|
|
self.sign3 = Signal()
|
|
self.e1 = Signal((8,True), reset_less=True) # Signed exponents!
|
|
self.e2 = Signal((8,True), reset_less=True)
|
|
self.e3 = Signal((8,True), reset_less=True)
|
|
#self.m1 = Signal((23+1+3,False), reset_less=True) # Unsigned mantissas! TODO: Verify sign!
|
|
self.m1 = Signal((7+2+3,False), reset_less=True) # Unsigned mantissas! TODO: Verify sign!
|
|
#self.m2 = Signal((24+1+3,False), reset_less=True) # 23 bits + 1bit (1.xx = 0x800000)
|
|
self.m2 = Signal((7+2+3,False), reset_less=True) # 7 bits + 1bit (1.xx = 0x800000) + 2 spare
|
|
#self.m3 = Signal((25+1+3,True), reset_less=True) # + Sign + R(0)/Guard & Sticky bits
|
|
self.m3 = Signal((8+2+3,True), reset_less=True) # + Sign + R(0)/Guard & Sticky bits
|
|
self.lm3 = Signal((64,True), reset_less=True) # MUL long result
|
|
self.s32 = Signal((32,True), reset_less=True) # Signed 32-bit
|
|
|
|
self.s_bit = Signal() # Sticky bit (for rounding control)
|
|
self.branch1 = Signal() # Branch helpers
|
|
self.branch2 = Signal()
|
|
#self.i = Signal(5) # Loop counter, range 0..31
|
|
self.i = Signal(4) # Loop counter, range 0..15
|
|
|
|
FPU_fsm = FSM(reset_state="FPU_IDLE") # FSM starts idling ...
|
|
self.submodules += FPU_fsm
|
|
self.FPU_state = Signal(9, reset_less=True) # Debugging support
|
|
|
|
FPU_fsm.act("FPU_IDLE",
|
|
NextValue(self.FPU_state, 0),
|
|
If((self.fadd | self.fsub) & ~self.fready, # Triggers set & ready flag reset externally!
|
|
NextValue(self.sign1, self.fs1[31]),
|
|
NextValue(self.sign2, self.fs2[31] ^ self.fsub), # Invert sign for subtraction!
|
|
NextValue(self.e1, self.fs1[23:31] - 127),
|
|
NextValue(self.e2, self.fs2[23:31] - 127),
|
|
#NextValue(self.m1, Cat(0,0,0, self.fs1[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
|
|
NextValue(self.m1, Cat(0,0,0, self.fs1[0:7], 1, 0)), # | 0x00800000 + R/G/S bits
|
|
#NextValue(self.m2, Cat(0,0,0, self.fs2[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
|
|
NextValue(self.m2, Cat(0,0,0, self.fs2[0:7], 1, 0)), # | 0x00800000 + R/G/S bits
|
|
NextState("FADD1")
|
|
).Elif((self.fmin | self.fmax | self.fmadd | self.fmsub | self.fnmadd | self.fnmsub | self.fmul | self.fdiv) & ~self.fready, # Triggers set & ready flag reset externally!
|
|
NextValue(self.sign1, self.fs1[31]),
|
|
NextValue(self.sign2, self.fs2[31]),
|
|
NextValue(self.e1, self.fs1[23:31] - 127),
|
|
NextValue(self.e2, self.fs2[23:31] - 127),
|
|
#NextValue(self.m1, Cat(self.fs1[0:23], 1, 0)), # | 0x00800000
|
|
NextValue(self.m1, Cat(self.fs1[0:7], 1, 0, 0,0,0)), # | 0x00800000
|
|
#NextValue(self.m2, Cat(self.fs2[0:23], 1, 0)), # | 0x00800000
|
|
NextValue(self.m2, Cat(self.fs2[0:7], 1, 0, 0,0,0)), # | 0x00800000
|
|
If(self.fdiv, # Division
|
|
NextState("FDIV1"),
|
|
).Elif(self.fmin, # Minimum
|
|
NextState("FMIN1"),
|
|
).Elif(self.fmax, # Maximum
|
|
NextState("FMAX1")
|
|
).Else( # Multiplication variants
|
|
NextState("FMUL1"),
|
|
)
|
|
).Elif(self.fsqrt & ~self.fready, # Trigger set & ready flag reset externally!
|
|
NextValue(self.sign1, self.fs1[31]),
|
|
NextValue(self.e1, self.fs1[23:31] - 127),
|
|
#NextValue(self.m1, Cat(self.fs1[0:23], 1, 0)), # | 0x00800000
|
|
NextValue(self.m1, Cat(self.fs1[0:7], 1, 0, 0,0,0)), # | 0x00800000
|
|
NextState("FSQRT1"),
|
|
)
|
|
)
|
|
|
|
FPU_fsm.act("FADD1",
|
|
NextValue(self.FPU_state, 1),
|
|
# 1. Verify valid ranges 1st!
|
|
If(((self.fs1[0:31] == 0x7FFFFFFF) | (self.fs2[0:31] == 0x7FFFFFFF))
|
|
| ((self.sign1 ^ self.sign2) & ((self.e1 == -1) & (self.e2 == -1))),
|
|
NextValue(self.fresult, 0x7FFFFFFF), # NAN
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Elif(self.e1 == -1, # Infinity
|
|
NextValue(self.fresult, self.fs1), # Return infinity
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Elif(self.e2 == -1, # Infinity
|
|
NextValue(self.fresult, self.fs2), # Return infinity
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Elif(self.fs1[0:31] == 0, # Nothing to add? (w/o sign!)
|
|
If(self.fsub, # Subtract yields negative result!
|
|
NextValue(self.fresult, self.fs2 ^ 0x80000000), # Invert sign
|
|
).Elif(self.fmsub | self.fnmsub, # 0*x=>0! 0-fs3 = +fs3!
|
|
NextValue(self.fresult, self.fs3 ^ 0x80000000), # Invert sign
|
|
).Elif(self.fmadd | self.fnmadd, # 0*x=>0! 0+fs3 = fs3!
|
|
NextValue(self.fresult, self.fs3), # Ready!
|
|
).Else( # Straight add
|
|
NextValue(self.fresult, self.fs2), # Ready!
|
|
),
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Elif((self.fadd | self.fsub) & (self.fs2[0:31] == 0), # Nothing to add? (w/o sign!)
|
|
NextValue(self.fresult, self.fs1), # Ready!
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Elif((self.fmadd | self.fmsub | self.fnmadd | self.fnmsub) & ((self.e2 == 0) & (self.m2 == 0)), # Nothing to add (w/o sign!)
|
|
NextState("self.fresult") # Just supply (normalized finally!) result from multiplication!
|
|
).Else( # Ok, valid floats supplied ...
|
|
NextValue(self.s_bit, 0),
|
|
NextValue(self.branch1, 0), # Reset helpers
|
|
NextValue(self.branch2, 0),
|
|
NextState("FADD2")
|
|
)
|
|
)
|
|
FPU_fsm.act("FADD2",
|
|
# 2. Compare exponents: The higher one will be taken, the lower one adjusted
|
|
If(self.e1 < self.e2,
|
|
NextValue(self.FPU_state, 21),
|
|
If(self.m1[0], NextValue(self.s_bit, 1)), # Keep shifted out bits (ORed sticky bit)
|
|
NextValue(self.m1, self.m1 >> 1),
|
|
NextValue(self.e1, self.e1 + 1),
|
|
NextValue(self.branch1, 1),
|
|
).Elif(self.e1 > self.e2,
|
|
NextValue(self.FPU_state, 22),
|
|
If(self.m2[0], NextValue(self.s_bit, 1)), # Keep shifted out bits (ORed sticky bit)
|
|
NextValue(self.m2, self.m2 >> 1),
|
|
NextValue(self.e2, self.e2 + 1),
|
|
NextValue(self.branch2, 1),
|
|
).Else(
|
|
NextValue(self.FPU_state, 23),
|
|
If(self.branch1, NextValue(self.m1, self.m1 | self.s_bit)), # Add sticky bit (if any)
|
|
If(self.branch2, NextValue(self.m2, self.m2 | self.s_bit)),
|
|
NextState("FADD3")
|
|
)
|
|
)
|
|
FPU_fsm.act("FADD3",
|
|
NextValue(self.FPU_state, 3),
|
|
# 3. Add mantissas (as both are of same base now)
|
|
If(~self.sign1 & ~self.sign2, # Negotiate sign -> ADD/SUB
|
|
NextValue(self.m3, self.m1 + self.m2)
|
|
).Else(
|
|
If(self.sign1 & ~self.sign2,
|
|
NextValue(self.m3, self.m2 - self.m1)
|
|
).Else(
|
|
If(~self.sign1 & self.sign2,
|
|
NextValue(self.m3, self.m1 - self.m2)
|
|
).Else(
|
|
NextValue(self.m3, -(self.m1 + self.m2))
|
|
)
|
|
)
|
|
),
|
|
NextState("FADD4")
|
|
)
|
|
FPU_fsm.act("FADD4",
|
|
NextValue(self.FPU_state, 4),
|
|
# 4. Retrieve sign & unsigned absolute value
|
|
If(self.m3 < 0,
|
|
NextValue(self.sign3, 1), # Pull sign
|
|
NextValue(self.m3, -self.m3) # Absolute value pick
|
|
).Else( # m3 positive anyway
|
|
NextValue(self.sign3, 0), # Remember ...
|
|
),
|
|
NextValue(self.e3, self.e1), # Starter value (e1/e2 are the same by now ...)
|
|
NextState("FADD5")
|
|
)
|
|
FPU_fsm.act("FADD5",
|
|
NextValue(self.FPU_state, 5),
|
|
# 5. Rounding to nearest/even (FCS_FRM=0x00)
|
|
If(self.m3[0:3] == 0x7, # Remainder (all set?): REMAINDER(0) + GUARD(MSB) + STICKYBIT (ORed rest)
|
|
NextValue(self.s_bit, 1) # Indicate rounding
|
|
).Else(
|
|
NextValue(self.s_bit, 0), # Reset otherwise
|
|
),
|
|
NextValue(self.m3, self.m3 >> 3), # Remove R/G/S bits
|
|
NextState("FADD6")
|
|
)
|
|
FPU_fsm.act("FADD6",
|
|
NextValue(self.FPU_state, 6),
|
|
# 6. Normalization of result: Overflow
|
|
#If(self.m3[24], # & 0x01000000,
|
|
If(self.m3[7+1], # & 0x01000000,
|
|
NextValue(self.m3, self.m3 >> 1), # Adjust mantissa & increment exponent
|
|
NextValue(self.e3, self.e3 + 1)
|
|
).Else(
|
|
NextValue(self.i, 0), # Reset for normalization restraining
|
|
NextState("FADD7")
|
|
)
|
|
)
|
|
FPU_fsm.act("FADD7",
|
|
# 7. Normalization: Result
|
|
NextValue(self.FPU_state, 7),
|
|
#If(~self.m3[23] & (self.i < 23), # & 0x00800000 (limit to max. loops)
|
|
If(~self.m3[7] & (self.i < 7), # & 0x00800000 (limit to max. loops)
|
|
NextValue(self.m3, self.m3 << 1), # Subtraction normalization
|
|
NextValue(self.e3, self.e3 - 1),
|
|
NextValue(self.i, self.i + 1), # Count loops ...
|
|
).Else(
|
|
If(self.s_bit, # Do we need rounding?!
|
|
NextValue(self.m3, self.m3 + self.s_bit),
|
|
NextState("FADD8") # Adjust possible overflow ...
|
|
).Else( # Nope, all ready
|
|
NextState("self.fresult")
|
|
)
|
|
)
|
|
)
|
|
FPU_fsm.act("FADD8",
|
|
NextValue(self.FPU_state, 8),
|
|
#If(self.m3[24], # & 0x01000000, # Overflow?
|
|
If(self.m3[7+1], # & 0x01000000, # Overflow?
|
|
NextValue(self.m3, self.m3 >> 1), # Adjust mantissa & increment exponent
|
|
NextValue(self.e3, self.e3 + 1)
|
|
),
|
|
NextState("self.fresult")
|
|
) # End of fadd.s processing
|
|
|
|
FPU_fsm.act("self.fresult", # Result contruction & possible rounding
|
|
NextValue(self.FPU_state, 9),
|
|
# 6. Build the actual resulting float
|
|
#NextValue(self.fresult, Cat(self.m3[0:23], self.e3+127, self.sign3)),
|
|
NextValue(self.fresult, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0 ,0,0,0,0, self.m3[0:7], self.e3+127, self.sign3)),
|
|
NextValue(self.fready, 1), # Indicate ready to main decoder
|
|
NextState("FPU_IDLE")
|
|
)
|
|
|
|
FPU_fsm.act("FMUL1",
|
|
NextValue(self.FPU_state, 1),
|
|
# 0. Verify valid ranges 1st!
|
|
If((self.fs1[0:31] == 0x7FFFFFFF) | (self.fs2[0:31] == 0x7FFFFFFF),
|
|
NextValue(self.fresult, 0x7FFFFFFF), # NAN
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Elif(self.e1 == -1, # Infinity
|
|
NextValue(self.fresult, self.fs1), # Return infinity
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Elif(self.e2 == -1, # Infinity
|
|
NextValue(self.fresult, self.fs2), # Return infinity
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Elif((self.fs1[0:31] == 0) | (self.fs2[0:31] == 0), # Nothing to multiply? (w/o sign!)
|
|
NextValue(self.fresult, 0), # Result will be zero ...
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Else( # Ok, valid floats supplied ...
|
|
NextValue(self.sign3, self.sign1 ^ self.sign2), # 1. Calculate result sign
|
|
NextValue(self.e3, self.e1 + self.e2), # 2. Calculate resulting exponent (add!)
|
|
NextValue(self.lm3, self.m1 * self.m2), # 3. Significants multiplication (result size: 2x (sizeof(mantissa)+1) !)
|
|
NextState("FMUL2")
|
|
)
|
|
)
|
|
FPU_fsm.act("FMUL2",
|
|
NextValue(self.FPU_state, 2),
|
|
# 4. MSB set in significants (i.e. bit[45])?
|
|
# Bitoffset: 48 32 16 0
|
|
If(self.lm3[47], # & 0x0000800000000000, TODO: Verify bit# (45 or 47?)!
|
|
NextValue(self.lm3, self.lm3 >> 1), # Normalize result: Overflow
|
|
NextValue(self.e3, self.e3 + 1),
|
|
),
|
|
If(self.fmul, # Regular multiplication
|
|
NextState("FMUL3") # Do the rounding!
|
|
).Else( # Fused multiply/add? W/O rounding!
|
|
NextState("FMUL5")
|
|
)
|
|
)
|
|
FPU_fsm.act("FMUL3",
|
|
# 5. Rounding to nearest/even (FCS_FRM=0x00)
|
|
If(self.lm3[22] & self.lm3[23], # & 0xC00000) == 0xC00000 Remainder (to be skipped): RESULTBIT(0) + REMAINDERBIT(MSB) set?
|
|
If(self.lm3[0:22] != 0, # Sticky-Bit S (ORed rest) set?
|
|
#Bit:48 32 16 0 (>>23)
|
|
# 0000 2000 0000 0000 (Overflow 1.x)
|
|
NextValue(self.lm3, (self.lm3 & 0x00007FFFFF800000) + 0x800000), # Add remainder
|
|
NextState("FMUL4")
|
|
).Else(
|
|
NextState("FMUL5")
|
|
)
|
|
).Else(
|
|
NextState("FMUL5")
|
|
)
|
|
)
|
|
FPU_fsm.act("FMUL4",
|
|
# Overflow normalization
|
|
# Bit:48 32 16 0
|
|
If(self.lm3[47], # & 0x0000800000000000
|
|
NextValue(self.lm3, self.lm3 >> 1), # Normalize result: Overflow
|
|
NextValue(self.e3, self.e3 + 1)
|
|
),
|
|
NextState("FMUL5")
|
|
)
|
|
FPU_fsm.act("FMUL5",
|
|
# 6. Construction of result
|
|
NextValue(self.m3, (self.lm3 >> 23) & 0x7FFFFF),
|
|
# TODO: e3=se3 omitted ok?
|
|
If(self.fmul, # Simple multiplication
|
|
NextState("self.fresult")
|
|
).Else( # Fused multiply-add?
|
|
NextValue(self.sign3, self.sign3 ^ (self.fnmadd | self.fnmsub)), # Negate mult. result w/ f<n>xxx
|
|
NextState("FMADD1")
|
|
)
|
|
) # End of fmul.s processing
|
|
FPU_fsm.act("FMADD1",
|
|
NextValue(self.sign1, self.sign3), # Negate mult. result w/ f<n>xxx
|
|
NextValue(self.sign2, self.fs3[31] ^ (self.fmsub | self.fnmsub)), # Invert sign for subtraction!
|
|
NextValue(self.e1, self.e3),
|
|
NextValue(self.e2, self.fs3[23:31] - 127),
|
|
#NextValue(self.m1, Cat(0,0,0, self.m3[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
|
|
NextValue(self.m1, Cat(0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, self.m3[0:7], 1, 0)), # | 0x00800000 + R/G/S bits
|
|
#NextValue(self.m2, Cat(0,0,0, self.fs3[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
|
|
NextValue(self.m2, Cat(0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, self.fs3[0:7], 1, 0)), # | 0x00800000 + R/G/S bits
|
|
NextState("FADD1")
|
|
)
|
|
|
|
FPU_fsm.act("FDIV1",
|
|
NextValue(self.FPU_state, 1),
|
|
# 0. Verify valid ranges 1st!
|
|
If((self.fs1[0:31] == 0x7FFFFFFF) | (self.fs2[0:31] == 0x7FFFFFFF) | ((self.fs1[0:31] == 0) & (self.fs2[0:31] == 0)),
|
|
NextValue(self.fresult, 0x7FFFFFFF), # NAN
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Elif(self.e1 == -1, # Infinity
|
|
NextValue(self.fresult, self.fs1), # Return infinity
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Elif(self.e2 == -1, # Infinity
|
|
NextValue(self.fresult, self.fs2), # Return infinity
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Elif(self.fs2 == 0, # Division by zero?
|
|
If(self.sign3,
|
|
NextValue(self.fresult, 0xFF800000), # - Infinity
|
|
).Else(
|
|
NextValue(self.fresult, 0x7F800000), # + Infinity
|
|
),
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Else( # Ok, valid floats supplied ...
|
|
NextValue(self.sign3, self.sign1 ^ self.sign2), # 1. Calculate result sign
|
|
NextValue(self.e3, self.e1 - self.e2), # 2. Calculate resulting exponent (subtract!)
|
|
NextValue(self.m3, 0), # 3. Significant preparation
|
|
NextValue(self.i, 0), # Loop counter
|
|
NextState("FDIV2")
|
|
)
|
|
)
|
|
FPU_fsm.act("FDIV2",
|
|
#If(self.i < 24,
|
|
If(self.i < 8,
|
|
NextValue(self.FPU_state, 2),
|
|
If(self.m1 < self.m2,
|
|
NextValue(self.m3, self.m3 << 1), # Append a zero
|
|
NextValue(self.m1, self.m1 << 1),
|
|
).Else( # Append a one
|
|
NextValue(self.m3, (self.m3 << 1) | 1),
|
|
NextValue(self.m1, (self.m1 - self.m2) << 1),
|
|
),
|
|
NextValue(self.i, self.i + 1)
|
|
).Else( # Loop exceeded
|
|
# 4. Normalization
|
|
NextValue(self.FPU_state, 3),
|
|
#If(~self.m3[23], # & 0x00800000
|
|
If(~self.m3[7], # & 0x00800000
|
|
NextValue(self.m3, self.m3 << 1), # Subtraction normalization
|
|
NextValue(self.e3, self.e3 - 1),
|
|
).Else(
|
|
NextState("self.fresult")
|
|
)
|
|
)
|
|
) # End of fdiv.s processing
|
|
|
|
FPU_fsm.act("FSQRT1",
|
|
NextValue(self.FPU_state, 1),
|
|
# 1. Verify valid ranges 1st!
|
|
If((self.fs1[0:31] == 0x7FFFFFFF) | self.sign1,
|
|
NextValue(self.fresult, 0x7FFFFFFF), # NAN
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Elif(self.e1 == -1, # Infinity
|
|
NextValue(self.fresult, self.fs1), # Return +/- infinity
|
|
NextValue(self.fready, 1),
|
|
NextState("FPU_IDLE")
|
|
).Else( # Better fast, than accurate! Use Newton-Raphson in S/W for better accuracy!
|
|
# Goldschmidt's algorithm (only 1 digit after decimal point ok, error varies, s.b)
|
|
#If((self.m1[0:23] != 0) | (self.e1 == 1), # Not 2^x (m==0!) and x!=1
|
|
If((self.m1[0:7] != 0) | (self.e1 == 1), # Not 2^x (m==0!) and x!=1
|
|
#return sqrt_approx(f, 0x0004B0D2); // Minimized error (max. 3.5%)
|
|
NextValue(self.branch1, 1), # Use 0x0004B0D2 for minimized error (<= 3.5%)
|
|
).Else(
|
|
NextValue(self.branch1, 0), # Use 0x00000000, only for 2^x exact, others up to ~6% error
|
|
),
|
|
NextValue(self.s32, self.fs1), # Pick up float value for manipulation
|
|
NextState("FSQRT2")
|
|
)
|
|
)
|
|
FPU_fsm.act("FSQRT2",
|
|
NextValue(self.FPU_state, 2),
|
|
# 1 << 23 /* Subtract 2^m. (0x40000000) */
|
|
# >> 1; /* Divide by 2. */
|
|
# 1 << 29 /* Add ((b + 1) / 2) * 2^m. */
|
|
If(self.branch1,
|
|
NextValue(self.s32, ((self.s32 - 0x00800000) >> 1) + (0x20000000 - 0x0004B0D2)), # Error minimizer term!
|
|
).Else(
|
|
NextValue(self.s32, ((self.s32 - 0x00800000) >> 1) + 0x20000000),
|
|
),
|
|
NextState("FSQRT3")
|
|
)
|
|
FPU_fsm.act("FSQRT3",
|
|
NextValue(self.FPU_state, 3),
|
|
NextValue(self.fresult, self.s32), # Just map value straight ...
|
|
NextValue(self.fready, 1), # Indicate ready to main decoder
|
|
NextState("FPU_IDLE")
|
|
) # End of fsqrt.s processing
|
|
|
|
FPU_fsm.act("FMIN1",
|
|
# Simple sign compare ahead
|
|
If(self.sign1 ^ self.sign2, # Sign mismatch? That's easy!
|
|
If(self.sign1, # f1 negative -> hence smaller (min!)
|
|
NextValue(self.fresult, self.fs1), # Just map value straight ...
|
|
).Else( # f2 negative/min
|
|
NextValue(self.fresult, self.fs2), # Just map value straight ...
|
|
)
|
|
).Elif(self.e1 < self.e2, # Same sign: Compare exponents, then (maybe) mantissas
|
|
# f1 smaller absolute number?
|
|
If(self.sign1, # But negative?
|
|
NextValue(self.fresult, self.fs2), # Just map value straight ...
|
|
).Else( # Positive
|
|
NextValue(self.fresult, self.fs1), # Just map value straight ...
|
|
)
|
|
).Elif(self.e2 < self.e1, # f2 smaller absolute number?
|
|
If(self.sign1, # But negative?
|
|
NextValue(self.fresult, self.fs1), # Just map value straight ...
|
|
).Else( # Positive
|
|
NextValue(self.fresult, self.fs2), # Just map value straight ...
|
|
)
|
|
).Else( # Equal exponents?
|
|
If(self.m1 < self.m2, # Compare mantissas: f1 smaller
|
|
If(self.sign1, # But negative?
|
|
NextValue(self.fresult, self.fs2), # Just map value straight ...
|
|
).Else( # Positive
|
|
NextValue(self.fresult, self.fs1), # Just map value straight ...
|
|
)
|
|
).Else( # f2 smaller/equal
|
|
If(self.sign1, # But negative?
|
|
NextValue(self.fresult, self.fs1), # Just map value straight ...
|
|
).Else( # Positive
|
|
NextValue(self.fresult, self.fs2), # Just map value straight ...
|
|
)
|
|
)
|
|
),
|
|
NextValue(self.fready, 1), # Indicate ready to main decoder
|
|
NextState("FPU_IDLE")
|
|
) # End of fmin.s processing
|
|
|
|
FPU_fsm.act("FMAX1",
|
|
# Simple sign compare ahead
|
|
If(self.sign1 ^ self.sign2, # Sign mismatch? That's easy!
|
|
If(self.sign1, # f1 negative -> hence smaller (min!)
|
|
NextValue(self.fresult, self.fs2), # Just map value straight ...
|
|
).Else( # f2 negative/min
|
|
NextValue(self.fresult, self.fs1), # Just map value straight ...
|
|
)
|
|
).Elif(self.e1 < self.e2, # Same sign: Compare exponents, then (maybe) mantissas
|
|
# f1 smaller absolute number?
|
|
If(self.sign1, # But negative?
|
|
NextValue(self.fresult, self.fs1), # Just map value straight ...
|
|
).Else( # Positive
|
|
NextValue(self.fresult, self.fs2), # Just map value straight ...
|
|
)
|
|
).Elif(self.e2 < self.e1, # f2 smaller absolute number?
|
|
If(self.sign1, # But negative?
|
|
NextValue(self.fresult, self.fs2), # Just map value straight ...
|
|
).Else( # Positive
|
|
NextValue(self.fresult, self.fs1), # Just map value straight ...
|
|
)
|
|
).Else( # Equal exponents?
|
|
If(self.m1 < self.m2, # Compare mantissas: f1 smaller
|
|
If(self.sign1, # But negative?
|
|
NextValue(self.fresult, self.fs1), # Just map value straight ...
|
|
).Else( # Positive
|
|
NextValue(self.fresult, self.fs2), # Just map value straight ...
|
|
)
|
|
).Else( # f2 smaller/equal
|
|
If(self.sign1, # But negative?
|
|
NextValue(self.fresult, self.fs2), # Just map value straight ...
|
|
).Else( # Positive
|
|
NextValue(self.fresult, self.fs1), # Just map value straight ...
|
|
)
|
|
)
|
|
),
|
|
NextValue(self.fready, 1), # Indicate ready to main decoder
|
|
NextState("FPU_IDLE")
|
|
) # End of fmax.s processing
|
|
|