master
kaqu 1 year ago
parent 9dc25b43d7
commit a71d4f908e
  1. 30
      software/source/bfloat16nnlib.c

@ -50,7 +50,7 @@ extern int key_eval(void);
#define DRAMDATASIZE2 DRAMDATASIZE1
static uint32_t fpgastate, fpustates;
static int fpgaload(uint32_t *mempt1, uint32_t *mempt2, int16_t calclen, int bReload1, int bReload2)
static int fpgaload(uint32_t *mempt1, uint32_t *mempt2, uint16_t calclen, int bReload1, int bReload2)
{
uint32_t *sentinel1 = (uint32_t *)(DRAMDATABASE1 + ((DRAMDATASIZE1-1) * sizeof(uint32_t)));
uint32_t *sentinel2 = (uint32_t *)(DRAMDATABASE2 + ((DRAMDATASIZE2-1) * sizeof(uint32_t)));
@ -62,7 +62,7 @@ static int fpgaload(uint32_t *mempt1, uint32_t *mempt2, int16_t calclen, int bRe
bfloat16nn_b32Sentinel1_write(*sentinel1);
bfloat16nn_b32Sentinel2_write(*sentinel2);
flush_l2_cache(); // Strictly nec. for longer transfers
bfloat16nn_b10ArrayWordLen_write(calclen); // Indicate array length for calc.
bfloat16nn_b10ArrayWordLen_write(calclen>>1); // Indicate array length for calc. but honour split!
bfloat16nn_b32DRAMLoadAddress1_write((uint32_t)mempt1); // Indicate memory to load from
bfloat16nn_b32DRAMLoadAddress2_write((uint32_t)mempt2); // Indicate memory to load from
bfloat16nn_bReload1_write(bReload1 ? 1 : 0); // Reload mem#1
@ -113,14 +113,14 @@ void dumpfloat(float f)
int key_eval(void)
{
extern void printf1(const char *fmt, float f1);
uint32_t *ui32ptr1, *ui32ptr2;
//uint32_t *ui32ptr1, *ui32ptr2;
uint16_t *ui16ptr1, *ui16ptr2;
float fpResult1, fpResult2;
uint32_t starttime;
uint32_t deltatime;
int i;
#define MAXCALCLEN 4 //784 //16 OK
#define MAXCALCLEN 784 //784 //16 OK
switch(kbhit()) {
case 'r': // Reload
@ -156,32 +156,22 @@ int key_eval(void)
printf("S/W Delta t: %dms ", deltatime);
printf1("\t\t\tS/W SUM=%8.4f\n", sum);
// If not cleared fails???
for(i=0, ui32ptr1 = (uint32_t *)DRAMDATABASE1;i<DRAMDATASIZE1;i++) // Setup test data
*ui32ptr1++ = 0; // Clear all memory ...
for(i=0, ui32ptr2 = (uint32_t *)DRAMDATABASE2;i<DRAMDATASIZE2;i++) // Setup test data
*ui32ptr2++ = 0; // Clear all memory ...
ui16ptr1 = (uint16_t *)DRAMDATABASE1; // Absolute: bytes! Matrice/row
for(i=1;i<=MAXCALCLEN+1;i++)
for(i=1;i<=MAXCALCLEN;i++)
*ui16ptr1++ = f2ui16(1.0 * (float)i );
ui16ptr2 = (uint16_t *)DRAMDATABASE2; // Absolute: bytes! Vector
for(i=1;i<=MAXCALCLEN+1;i++)
for(i=1;i<=MAXCALCLEN;i++)
*ui16ptr2++ = f2ui16(1.0 * (float)i );
// BOTH: 1*1+2*2+3*3+4*4 (+5*5) = 1+4+9+16+25 = 5+9+16+25 = 14+16=30+25 = 55 WRONG!
// FPU#1: 1*1 +3*3 = 1+9 = 10 OK
// FPU#2: 2*2 +4*4 (+5*5)= 4+16(+5*5) = 20(+25) = 45 WRONG!
starttime = systime(0);
if(fpgaload((uint32_t *)DRAMDATABASE1, (uint32_t *)DRAMDATABASE2, MAXCALCLEN, 1, 1)) { // 800*32-bit=3200 bytes, 400 Words/FPU to calc.
if(fpgaload((uint32_t *)DRAMDATABASE1, (uint32_t *)DRAMDATABASE2, MAXCALCLEN, 1, 1)) {
deltatime = systime(0)-starttime;
printf("H/W Delta t: %dms ", deltatime);
fpResult1 = fpResult1_read();
fpResult2 = fpResult2_read();
printf("(S=%04Xh: FS=%04Xh)", fpgastate, fpustates);
printf1("\tH/W SUM=%8.4f", fpResult1);
printf1("\t(FPU#2=%8.4f)\n", fpResult2);
printf1("\tH/W SUM=%8.4f\n", fpResult1);
//printf1("\t(FPU#2=%8.4f)\n", fpResult2);
}
else {
printf("CURRENT TIMEOUT: S=%04Xh: FS=%04Xh ", fpgastate, fpustates);

Loading…
Cancel
Save