299 lines
14 KiB
Python
Executable File
299 lines
14 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
#
|
|
# bfloat16nn.py
|
|
#
|
|
# This file has been derived from LiteX-Boards/colorlight_5b_75x.py
|
|
# Copyright (c) 2020 Florent Kermarrec <florent@enjoy-digital.fr>
|
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
#
|
|
# Disclaimer: Still a proof of concept with large timings violations on the IP/UDP and
|
|
# Etherbone stack that need to be optimized. It was initially just used to validate the reversed
|
|
# pinout but happens to work on hardware...
|
|
#
|
|
# History:
|
|
# --------
|
|
# 21.04.21/KQ Initially derived version
|
|
#
|
|
# Build/Use ----------------------------------------------------------------------------------------
|
|
# - 'python3 bfloat16nn.py --build --revision=7.0 --uart-name=crossover --with-etherbone --ip-address=192.168.1.20 --csr-csv=build/csr.csv'
|
|
# to generate
|
|
# - 'python3 bfloat16nn.py --load' to download to FPGA
|
|
# - 'ping 192.168.1.20' to verify ethernet connection - via LEFT(!) RJ45 port
|
|
# - 'wishbone-tool --ethernet-host 192.168.1.20 --server terminal --csr-csv build/csr.csv'
|
|
# You should see the LiteX BIOS and be able to interact with it
|
|
# - To load a file to RAM (@0x40000000 len=0x400000) use:
|
|
# wishbone-tool --ethernet-host 192.168.1.20 --server load-file --csr-csv build/csr.csv
|
|
# --load-address 0x40000000
|
|
# --load-name build/colorlight_5a_75b/software/<filename>
|
|
# To disassemble raw file:
|
|
# ../fpga/litex/riscv64-unknown-elf-gcc-8.3.0-2019.08.0-x86_64-linux-ubuntu14/bin/riscv64-unknown-elf-objdump
|
|
# -D -b binary ./build/colorlight_5a_75b/software/bios/bios.bin -m riscv
|
|
#
|
|
|
|
import os
|
|
import argparse
|
|
import sys
|
|
import time
|
|
|
|
from migen import *
|
|
from migen.genlib.misc import WaitTimer
|
|
from migen.genlib.resetsync import AsyncResetSynchronizer
|
|
|
|
from litex.build.io import DDROutput
|
|
|
|
from litex_boards.platforms import colorlight_5a_75b
|
|
|
|
from litex.build.lattice.trellis import trellis_args, trellis_argdict
|
|
|
|
from litex.soc.cores.clock import *
|
|
from litex.soc.cores.spi_flash import ECP5SPIFlash
|
|
from litex.soc.cores.gpio import GPIOOut
|
|
from litex.soc.integration.soc_core import *
|
|
from litex.soc.integration.builder import *
|
|
|
|
from litex.soc.interconnect.csr import AutoCSR, CSRStatus, CSRStorage, CSRField
|
|
from litex.soc.interconnect.stream import SyncFIFO
|
|
|
|
from litedram.modules import M12L16161A, M12L64322A
|
|
from litedram.phy import GENSDRPHY, HalfRateGENSDRPHY
|
|
from litedram.frontend.dma import LiteDRAMDMAReader, LiteDRAMDMAWriter
|
|
|
|
from liteeth.phy.ecp5rgmii import LiteEthPHYRGMII
|
|
|
|
from litex.build.generic_platform import *
|
|
|
|
import litex.soc.doc as lxsocdoc
|
|
|
|
from libmodules.dramtransfer import DRAM2FPGA, FPGA2DRAM
|
|
from libmodules.systime import SysTime
|
|
from libmodules.bfloat16nncore import bfloat16NeuralNetworkCore
|
|
|
|
from helpers.prepare_firmware import copyjob
|
|
|
|
# CRG ----------------------------------------------------------------------------------------------
|
|
class _CRG(Module):
|
|
def __init__(self, platform, sys_clk_freq, use_internal_osc=False, with_usb_pll=False, with_rst=True, sdram_rate="1:1"):
|
|
self.clock_domains.cd_sys = ClockDomain()
|
|
if sdram_rate == "1:2":
|
|
self.clock_domains.cd_sys2x = ClockDomain()
|
|
self.clock_domains.cd_sys2x_ps = ClockDomain(reset_less=True)
|
|
else:
|
|
self.clock_domains.cd_sys_ps = ClockDomain(reset_less=True)
|
|
|
|
# # #
|
|
|
|
# Clk / Rst
|
|
if not use_internal_osc:
|
|
clk = platform.request("clk25")
|
|
clk_freq = 25e6
|
|
else:
|
|
clk = Signal()
|
|
div = 5
|
|
self.specials += Instance("OSCG",
|
|
p_DIV = div,
|
|
o_OSC = clk)
|
|
clk_freq = 310e6/div
|
|
|
|
rst_n = 1 if not with_rst else platform.request("user_btn_n", 0)
|
|
|
|
# PLL
|
|
self.submodules.pll = pll = ECP5PLL()
|
|
self.comb += pll.reset.eq(~rst_n)
|
|
pll.register_clkin(clk, clk_freq)
|
|
pll.create_clkout(self.cd_sys, sys_clk_freq)
|
|
if sdram_rate == "1:2":
|
|
pll.create_clkout(self.cd_sys2x, 2*sys_clk_freq)
|
|
pll.create_clkout(self.cd_sys2x_ps, 2*sys_clk_freq, phase=180) # Idealy 90° but needs to be increased.
|
|
else:
|
|
pll.create_clkout(self.cd_sys_ps, sys_clk_freq, phase=180) # Idealy 90° but needs to be increased.
|
|
|
|
# USB PLL
|
|
if with_usb_pll:
|
|
self.submodules.usb_pll = usb_pll = ECP5PLL()
|
|
self.comb += usb_pll.reset.eq(~rst_n)
|
|
usb_pll.register_clkin(clk, clk_freq)
|
|
self.clock_domains.cd_usb_12 = ClockDomain()
|
|
self.clock_domains.cd_usb_48 = ClockDomain()
|
|
usb_pll.create_clkout(self.cd_usb_12, 12e6, margin=0)
|
|
usb_pll.create_clkout(self.cd_usb_48, 48e6, margin=0)
|
|
|
|
# SDRAM clock
|
|
sdram_clk = ClockSignal("sys2x_ps" if sdram_rate == "1:2" else "sys_ps")
|
|
self.specials += DDROutput(1, 0, platform.request("sdram_clock"), sdram_clk)
|
|
|
|
# BaseSoC ------------------------------------------------------------------------------------------
|
|
class BaseSoC(SoCCore):
|
|
|
|
def __init__(self, board, revision, with_ethernet=False, with_etherbone=False, eth_phy=0, ip_address=None, mac_address=None, sys_clk_freq=60e6, use_internal_osc=False, sdram_rate="1:1", **kwargs):
|
|
platform = colorlight_5a_75b.Platform(revision="7.0")
|
|
|
|
# SoCCore ----------------------------------------------------------------------------------
|
|
SoCCore.__init__(self, platform, int(sys_clk_freq),
|
|
ident = "LiteX SoC on Colorlight " + board.upper(),
|
|
ident_version = True,
|
|
**kwargs)
|
|
|
|
# CRG --------------------------------------------------------------------------------------
|
|
with_rst = kwargs["uart_name"] not in ["serial", "bridge"] # serial_rx shared with user_btn_n.
|
|
with_usb_pll = kwargs.get("uart_name", None) == "usb_acm"
|
|
self.submodules.crg = _CRG(platform, sys_clk_freq, use_internal_osc=use_internal_osc, with_usb_pll=with_usb_pll,with_rst=with_rst, sdram_rate=sdram_rate)
|
|
|
|
# SDR SDRAM --------------------------------------------------------------------------------
|
|
if not self.integrated_main_ram_size:
|
|
sdrphy_cls = HalfRateGENSDRPHY if sdram_rate == "1:2" else GENSDRPHY
|
|
self.submodules.sdrphy = sdrphy_cls(platform.request("sdram"))
|
|
if board == "5a-75e" and revision == "6.0":
|
|
sdram_cls = M12L64322A
|
|
sdram_size = 0x80000000
|
|
else:
|
|
sdram_cls = M12L16161A
|
|
sdram_size = 0x40000000
|
|
self.add_sdram("sdram",
|
|
phy = self.sdrphy,
|
|
module = sdram_cls(sys_clk_freq, sdram_rate),
|
|
origin = self.mem_map["main_ram"],
|
|
size = kwargs.get("max_sdram_size", sdram_size),
|
|
l2_cache_size = kwargs.get("l2_size", 8192),
|
|
l2_cache_min_data_width = kwargs.get("min_l2_data_width", 128),
|
|
l2_cache_reverse = True
|
|
)
|
|
|
|
# Ethernet / Etherbone ---------------------------------------------------------------------
|
|
if with_ethernet or with_etherbone:
|
|
self.submodules.ethphy = LiteEthPHYRGMII(
|
|
clock_pads = self.platform.request("eth_clocks", eth_phy),
|
|
pads = self.platform.request("eth", eth_phy))
|
|
self.add_csr("ethphy")
|
|
if with_ethernet:
|
|
self.add_ethernet(phy=self.ethphy)
|
|
if with_etherbone:
|
|
self.add_etherbone(
|
|
phy=self.ethphy,
|
|
ip_address = ip_address,
|
|
mac_address = mac_address,
|
|
)
|
|
|
|
# Base counter (used for clocking)
|
|
counter = Signal(32) # 32-Bit counter
|
|
self.sync += counter.eq(counter + 1)
|
|
|
|
# System time (count)
|
|
self.submodules.systime = systime = SysTime(comparecount=0x0000EA90)
|
|
self.add_csr("systime")
|
|
|
|
# DRAM access section
|
|
MAXWORDS = 512 #512 # Transfer length 32 x 32-bit, FIFO depth (511 L1 cache currently possible = 9-bit!)
|
|
# Load unit memory access
|
|
self.submodules.dma_reader = dma_reader = LiteDRAMDMAReader(self.sdram.crossbar.get_port(), fifo_depth=MAXWORDS, fifo_buffered=True)
|
|
dma_reader.add_csr()
|
|
self.add_csr("dma_reader")
|
|
# Load unit transfer
|
|
self.submodules.sync_fifo_in = sync_fifo_in = SyncFIFO([("data", 32)], MAXWORDS, True)
|
|
self.comb += dma_reader.source.connect(sync_fifo_in.sink) # Connect DMA-Reader.source -> FIFO.sink
|
|
# Load unit (LU)
|
|
self.submodules.dram2fpga = dram2fpga = DRAM2FPGA(maxwords=MAXWORDS, dma_reader=dma_reader, sync_fifo=sync_fifo_in)
|
|
self.add_csr("dram2fpga")
|
|
|
|
""" *** Not used currently ! ***
|
|
MAXWRITEWORDS = 1 # Transfer length 1 x 32-bit = 4 byte maximum (SU)
|
|
# Store unit memory access
|
|
self.submodules.dma_writer = dma_writer = LiteDRAMDMAWriter(self.sdram.crossbar.get_port(), fifo_depth=MAXWRITEWORDS, fifo_buffered=True)
|
|
dma_writer.add_csr()
|
|
self.add_csr("dma_writer")
|
|
# Store unit transfer
|
|
self.submodules.sync_fifo_out = sync_fifo_out = SyncFIFO([("data", 32)], MAXWRITEWORDS, True)
|
|
self.comb += sync_fifo_out.source.connect(dma_writer.sink) # Connect FIFO.source -> DMA-Writer.sink
|
|
# Store unit (SU)
|
|
self.submodules.fpga2dram = fpga2dram = FPGA2DRAM(dma_writer=dma_writer, sync_fifo=sync_fifo_out)
|
|
self.add_csr("fpga2dram")
|
|
"""
|
|
# Integrate bfloat16NN processor
|
|
RAMWAITTIME=1 # Minimum wait!
|
|
self.submodules.bfloat16nn = bfloat16nn = bfloat16NeuralNetworkCore(
|
|
RAMWaitTime=RAMWAITTIME,
|
|
LUCacheSize=MAXWORDS,
|
|
LoadUnit=dram2fpga,
|
|
StoreUnit=None, # *** Not used currently: fpga2dram,
|
|
)
|
|
self.add_csr("bfloat16nn")
|
|
|
|
# USERLED blink (on-board LED)
|
|
# only w/ uart-name=crossover option:
|
|
if kwargs["uart_name"] not in ["serial", "bridge"]:
|
|
self.comb += platform.request("user_led_n").eq(~(bfloat16nn.bReady))
|
|
|
|
# Build --------------------------------------------------------------------------------------------
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="LiteX SoC on Colorlight 5A-75X")
|
|
builder_args(parser)
|
|
soc_core_args(parser)
|
|
trellis_args(parser)
|
|
parser.add_argument("--build", action="store_true", help="Build bitstream")
|
|
parser.add_argument("--load", action="store_true", help="Load bitstream")
|
|
parser.add_argument("--board", default="5a-75b", help="Board type: 5a-75b (default) & don't change!")
|
|
parser.add_argument("--revision", default="7.0", type=str, help="Board revision 7.0 (default) & don't change!")
|
|
parser.add_argument("--with-ethernet", action="store_true", help="Enable Ethernet support")
|
|
parser.add_argument("--with-etherbone", action="store_true", help="Enable Etherbone support")
|
|
parser.add_argument("--eth-phy", default=0, type=int, help="Ethernet PHY 0 or 1 (default=0)")
|
|
parser.add_argument("--ip-address", default="192.168.1.50", help="Ethernet IP address of the board.")
|
|
parser.add_argument("--mac-address", default="0x726b895bc2e2", help="Ethernet MAC address of the board.")
|
|
parser.add_argument("--sys-clk-freq", default=60e6, type=float, help="System clock frequency (default=60MHz)")
|
|
parser.add_argument("--use-internal-osc", action="store_true", help="Use internal oscillator")
|
|
parser.add_argument("--sdram-rate", default="1:1", help="SDRAM Rate 1:1 Full Rate (default), 1:2 Half Rate")
|
|
parser.add_argument("--csr_csv", default="build/csr.csv", help="CSR list location")
|
|
parser.add_argument("--doc", action="store_true", help="Create doc files for sphinx generator")
|
|
parser.add_argument("--flash", action="store_true", help="Load bitstream to flash")
|
|
args = parser.parse_args()
|
|
|
|
#assert not (args.with_ethernet and args.with_etherbone)
|
|
soc = BaseSoC(board=args.board, revision=args.revision,
|
|
with_ethernet = args.with_ethernet,
|
|
with_etherbone = args.with_etherbone,
|
|
eth_phy = args.eth_phy,
|
|
ip_address = args.ip_address,
|
|
mac_address = int(args.mac_address, 0),
|
|
sys_clk_freq = args.sys_clk_freq,
|
|
use_internal_osc = args.use_internal_osc,
|
|
sdram_rate = args.sdram_rate,
|
|
**soc_core_argdict(args))
|
|
|
|
# 32MBit SPIFlash ------------------------------------------------------------------------
|
|
flashbase = 0xc0000000
|
|
flashoffset = 0x100000 # Used to be zero (default)
|
|
soc.mem_map["spiflash"] = flashbase # Length: 0x01000000 ('til 0xc1000000 - 1)
|
|
# Boot at +1MB
|
|
soc.add_constant("FLASH_BOOT_ADDRESS", soc.mem_map["spiflash"] + 1024*1024) # 0xc0100000
|
|
soc.add_spi_flash(name="spiflash", mode="1x", dummy_cycles=8, clk_freq=5e6)
|
|
|
|
builder = Builder(soc, **builder_argdict(args))
|
|
# Now override boot address (used to be zero/default)
|
|
args.ecppack_bootaddr = flashbase + flashoffset # 0xC0100000
|
|
builder.build(**trellis_argdict(args), run=args.build) # Written here to (local) build tree
|
|
|
|
if args.doc:
|
|
print("Generating documentation for sphinx ...")
|
|
lxsocdoc.generate_docs(soc, "build/documentation/", project_name="neopixelar", author="KQ")
|
|
print("Generate via: 'sphinx-build -b html build/documentation build/documentation/html'")
|
|
|
|
if args.load:
|
|
prog = soc.platform.create_programmer()
|
|
prog.load_bitstream(os.path.join(builder.gateware_dir, soc.build_name + ".svf"))
|
|
return
|
|
|
|
if args.flash: # Convert Bit File to Jtag Write Flash command
|
|
name = os.path.join(builder.gateware_dir, soc.build_name)
|
|
print(f"Executing ./bit_to_flash.py {name}.bit {name}.svf.flash")
|
|
from helpers.bit_to_flash import convertBitToFlashFile
|
|
convertBitToFlashFile(name + ".bit", name + ".svf.flash", address=0)
|
|
from helpers.load_to_flash import load2flash
|
|
load2flash(name + ".svf.flash")
|
|
return
|
|
|
|
|
|
if __name__ == "__main__":
|
|
starttime = time.time()
|
|
copyjob() # Create backup if nec. & move our firmware to the correct location
|
|
main() # Create FPGA & load/flash
|
|
print("Time used: {0} min.".format(int((time.time() - starttime)/60.0)))
|