Result of multiplication is zero. Not so in simulation

stekar · February 15, 2021, 3:12pm

I’ve successfully simulated a cos function with input x0 being the angle 0…2**n. Sector is 0 for 0…pi/2 up to 3 for 3pi/2…2pi

Implementing in hw i found that already my first multiplication on line 31 or 34 did return zero instead of the expected values which i get when simulating:
31 X.next=(x0*piquart)>>n #this results in X=0 for all x0
(I read back internal fpga data via a spi interface)

x0 indeed does a triangle wave 0…2**n

Tried with always_comb and always(‘angle or sector i.e. x0,sector’)
also with Signal or not Signal for the constants but with no success.

Any suggestions?
I can publish the complete set of 4 files if needed, here is just the cos generator:

#unit_cos4.py 10.02.21  sk
#taylor series for cos: 1- 1/2*x**2 + 1/24*x**4 -1/720*x**6
#tested ok with testcase_unit.py
#inputs sector (0..3),  angle x0 (0..2**n)
#output y0 = cos(x0)
from myhdl import *
from math import pi

@block
def unit_cos4(sector,x0,y0,n,y10):
    One=Signal(intbv(2**n-1,min=0,max=2**n))
    piquart=Signal(intbv(int(round((pi/2.0)*2**(n-1))),min=0,max=2**n))
    Inv24=Signal(intbv(int(round((1/24.0)*2**n)),min=0,max=2**n))
    Inv720=Signal(intbv(int(round((1/720.0)*2**n)),min=0,max=2**n))
    X=Signal(intbv(0,min=0,max=2**n))
    X2=Signal(intbv(0,min=0,max=2**n))
    X4=Signal(intbv(0,min=0,max=2**n))
    X6=Signal(intbv(0,min=0,max=2**n))
    t1=Signal(intbv(0,min=0,max=2**n))
    t2=Signal(intbv(0,min=0,max=2**n))
    t3=Signal(intbv(0,min=0,max=2**n))
    #y10=Signal(intbv(0,min=-2**n,max=2**n))  #probe for debug only

    @always_comb
    def probe():
	    y10.next=X[:1]           #X is unsigned, y10 is signed. both n bits

    @always(x0,sector)
    def normalize():
	    if sector==0 or sector==2:
		    X.next=(x0*piquart)>>n        #this results in X=0 for all x0
		    #X.next=x0
	    else:
		    X.next=((One-x0)*piquart)>>n
		    #X.next=(One-x0)              #this results in X=0 for all x0

    @always_comb
    def calc0():
	    X2.next=(X*X)>>(n+1)  #half x**2

    @always_comb
    def calc1():
	    t1.next= X2          #half x**2
	   X4.next=(X2*X2)>>(n) #quart x**4

    @always_comb
    def calc2():
	    #t2.next=((X4*X4)>>(n))*Inv24)>>(n)
	    t2.next=(X4*Inv24)>>(n-2)  # *4 now to compensate
	    X6.next=(X4*X2)>>(n) #eighth of x**6

    @always_comb
    def calc3():
	    t3.next=(X6*Inv720)>>(n-3) # *8 now to compensate

    '''  clipping output for debug purpose
    @always_comb
    def clipsin():
	    if Y10temp>=2**(n-1):
		    y0.next=2**(n-1)-1
	    elif Y10temp<-2**(n-1):
		    y0.next=-2**(n-1)
	    else:
		    y0.next=Y10temp
    '''

    @always_comb
    def calcsum():
	    if sector==0 or sector==3:
		    #y0.next = (One-t1+t2-t3)>>1
		    y0.next = (One-t1+t2-t3)>>1 #1
	    else:
		    #y0.next = (t1-One-t2+t3)>>1
		    y0.next = (t1-One-t2+t3)>>1 #1

    return instances()

indent preformatted text by 4 spaces

josyb · February 15, 2021, 8:47pm

Tried dfc.convert(hdl="VHDL", initial_values=True)?
Check the value of the piquart signal.

stekar · February 16, 2021, 7:41am

Ah, josy - I checked piquart to be 25736 (normalized to Q15 after transer over 16bit spi) which is ok.

But finally I found what can be done:

instead of
X.next=(x0*piquart)>>n #this results in X=0 for all x0
I need to write
X.next=(x0>>5)*(piquart>>6) #for n=11

So the full cos unit looks like this:
(not parameterized in n yet)

#unit_cos4.py 16.02.21  sk
#tested ok with testcase_unit.py
#inputs sector (0..3),  angle x0 (0..2**n)
#output y0 = cos(x0)
from myhdl import *
from math import pi
#taylor series for cos: 1- 1/2*x**2 + 1/24*x**4 -1/720*x**6
@block
def unit_cos4(sector,x0,y0,n,y10):
	smax=2**(n+1)
	One=Signal(intbv(2**n-1,min=0,max=2**n)) #note: same width as x0 (2**n)
	piquart=Signal(intbv(int(round((pi/4.0)*smax)),min=0,max=smax))  #note: this is half of pihalf
	Inv24=Signal(intbv(int(round((1/24.0)*smax)),min=0,max=smax))
	Inv720=Signal(intbv(int(round((1/720.0)*smax)),min=0,max=smax))
	X=Signal(intbv(0,min=0,max=smax))
	X2=Signal(intbv(0,min=0,max=smax))
	X4=Signal(intbv(0,min=0,max=smax))
	X6=Signal(intbv(0,min=0,max=smax))
	t1=Signal(intbv(0,min=0,max=smax))
	t2=Signal(intbv(0,min=0,max=smax))
	t3=Signal(intbv(0,min=0,max=smax))
	#Y10temp=Signal(intbv(0,min=-2**n,max=2**n))  #for debug only

	@always_comb
	def probe():
		y10.next=y0 #piquart>>2    #X[:1]                   #X is unsigned, y10 is signed n bits


	@always(x0,sector)
	def normalize():
		if sector==0 or sector==2:
			#X.next=(x0*piquart)>>n        #>>n instead of (n+1) for doubling piquart to pihalf
			X.next=(x0>>5)*(piquart>>6)
		else:
			#X.next=((One-x0)*piquart)>>n
			X.next=((One-x0)>>5)*(piquart>>6)

	@always_comb
	def calc0():
		#X2.next=(X*X)>>(n+1)  #half x**2
		X2.next=(X>>6)*(X>>6)  #half x**2

	@always_comb
	def calc1():
		t1.next= X2          #half x**2
		#X4.next=(X2*X2)>>(n) #quart x**4
		X4.next=(X2>>6)*(X2>>5)

	@always_comb
	def calc2():
		#t2.next=(X4*Inv24)>>(n-2)  # *4 now to compensate
		t2.next=(X4>>6)*(Inv24>>4)
		#X6.next=(X4*X2)>>(n) #eighth of x**6
		X6.next=(X4>>6)*(X2>>5) #eighth of x**6

	@always_comb
	def calc3():
		#t3.next=(X6*Inv720)>>(n-3) # *8 now to compensate
		t3.next=(X6>>9)*(Inv720)

	'''  clipping output for debug purpose
	@always_comb
	def clipsin():
		if Y10temp>=2**(n-1):
			y0.next=2**(n-1)-1
		elif Y10temp<-2**(n-1):
			y0.next=-2**(n-1)
		else:
			y0.next=Y10temp
	'''

	@always_comb
	def calcsum():
		if sector==0 or sector==3:
			#y0.next = (One-t1+t2-t3)>>1
			y0.next = (One-t1+t2-t3)>>1
		else:
			#y0.next = (t1-One-t2+t3)>>1
			y0.next = (t1-One-t2+t3)>>1

	return instances()

I connected my raspberry pi with its spi interface to my tinyfpga from which i use the recommended yosys toolchain in atom.

Synchronizing to x0 (8kHz) i expect a 50Hz cos and sine to look like the red and the blue curves:
(x-axis is time in ms)

figure_1

The resulting cos data read from fpga shows some jitter which i expect to be from the raspi read timing and not from precision of calculation.

I’d like to comment on this that i expected the multiplication to work the same in simulation as in the resulting verilog. I would consider this a bug to be worth some effort for somebody who knows how to do it.

DrPi · February 16, 2021, 2:39pm

This is important to understand MyHDL is a simulation tool and a converter tool. You can simulate non convertible code. If you want to be sure the converted code is correct, you have to use co-simulation.

What do you think this code should convert to ?
What should the resulting size of x0*piquart be ?
If you want to get control on this, you have to use a variable to store the intermediate result. This will give the converter a direction for the size of the multiplication result.
This problem is not specific to MyHDL. You get the same problem with legacy HDL languages. Except that with VHDL, the synthesizer will certainly complain (I don’t know for verilog).

stekar · February 16, 2021, 6:29pm

Yes, i also came to the conclusion that it is necessary to code in more details.

In python though the precedence of operators is given. And in addition i used brackets to make it more clear what i want: x0*piquart should yield a intermediate result having the length of both operators added together. Right? Then shift right n bits and the result does fit into X.

I think this is what python does but not so the converter.
(And i think it would be nice both did the same thing)

But nevermind - i find myhdl is great - just many things to learn.

I now introduced a intermediate intbv variable containing the full result of the multiplication.
Then i shift as appropriate and assign to the Signal.

This is the cos unit as it works now and might be useful for others maybe:
(note that you have to keep track of the sector outside the routine. By choosing the respective sector you can calculate a sin or a cos).

#unit_cos5.py 17.02.21  sk
#tested ok with testcase_unit.py > improved resolution
#inputs sector (0..3),  angle x0 (0..2**n)
#output y0 = cos(x0)
from myhdl import *
from math import pi
#taylor series for cos: 1- 1/2*x**2 + 1/24*x**4 -1/720*x**6
@block
def unit_cos5(sector,x0,y0,n):
	smax=2**(n+1)
	One=Signal(intbv(2**n-1,min=0,max=2**n)) #note: same width as x0 (2**n)
	Piquart=Signal(intbv(int(round((pi/4.0)*smax)),min=0,max=smax))  #note: this is half of pihalf
	Inv24=Signal(intbv(int(round((1/24.0)*smax)),min=0,max=smax))
	Inv720=Signal(intbv(int(round((1/720.0)*smax)),min=0,max=smax))
	X=Signal(intbv(0,min=0,max=smax))
	X2=Signal(intbv(0,min=0,max=smax))
	X4=Signal(intbv(0,min=0,max=smax))
	X6=Signal(intbv(0,min=0,max=smax))
	t1=Signal(intbv(0,min=0,max=smax))
	t2=Signal(intbv(0,min=0,max=smax))
	t3=Signal(intbv(0,min=0,max=smax))
	im1=intbv(0,min=0,max=2**(2*n)) #intermediate results with full resolution

	'''
	@always_comb
	def probe():
		y10.next=y0 #Piquart
	'''

	@always(x0,sector)
	def normalize():
		if sector==0 or sector==2:
			#X.next=(x0*Piquart)>>n        #>>n instead of (n+1) for doubling Piquart to pihalf
			im1=x0*Piquart
			X.next=im1>>n
		else:
			#X.next=((One-x0)*Piquart)>>n
			im1=(One-x0)*Piquart
			X.next=im1>>n

	@always_comb
	def calc0():
		im1=X*X
		X2.next=im1>>n

	@always_comb
	def calc1():
		t1.next= X2>1           #half x**2
		im1=X2*X2
		X4.next=im1>>n    #compensate *4

	@always_comb
	def calc2():
		im1=X4*Inv24
		t2.next=im1>>n
		im1=X4*X2
		X6.next=im1>>n

	@always_comb
	def calc3():
		im1=X6*Inv720
		t3.next=im1>>n

	@always_comb
	def calcsum():
		if sector==0 or sector==3:
			y0.next = (One-t1+t2-t3)>>1
		else:
			y0.next = (t1-One-t2+t3)>>1

	return instances()

The calculation of a sin and a cos in real time captured with my raspberry pi looks as follows:

Figure_1

(If somebody is interested in the python routine capturing the data from spi, i’ll be glad to share here)

josyb · February 17, 2021, 4:15pm

Although this is a working work-around, you loose quite some precision.
I suggest this:

XL = Signal(intbv(0)[len(x0) + len(piquart):]

@always_comb
def mulpiq():
if sector == 0 or sector == 2:
	XL.next = x0 * piquart
else:
	XL.next = (One-x0) * piquart

@always_comb
def normalize():
       X.next = XL[:n]

Using sig[:n] is IMO better than sig >> n – they are equivalent

Regards,
Josy

stekar · February 17, 2021, 8:06pm

Good!
This is also easier to parameterize in n (which I just have a hard time with)…

stekar · February 17, 2021, 8:14pm

Uhhh… sorry - I just realize that this code is not working - i was in error and the display belongs to the first version of code which is not parameterized in n.

A working cos unit is to follow

stekar · February 18, 2021, 10:47am

Here is my cos unit working with the multiplication method outlined be Josy and DrPi.

Debugging in Hardware with the spi interface helped me to determine the terms that overflow and adapt the shifting and wordlength of intermediary data.
(I found it was not easy to predict these values and some trial and error was involved.)

It now works nicely on a tinyfpga, tested from 7 to 16 bits called in a 8kHz control loop.
My results display a 50Hz sin and cos with full and half amplitude, sampled at 8kHz with 16bit spi 3.2MHz ie.the maximum the raspberry pi can do.
The matplotlib figure contains 2 * 313 samples at 128us resulting in 40ms total time.

Figure_1

#unit_cos6.py 18.02.21  sk
#taylor series for cos: 1- 1/2*x0**2 + 1/24*x0**4 -1/720*x0**6
#tested ok with testcase_unit.py > improved resolution
#inputs sector (0..3),  angle x0 (0..2**n)
#output y0 = cos(x0)
from myhdl import *
from math import pi

@block
def unit_cos6(sector,x0,y0,n):
	One    = Signal(intbv(int(round(1.0*2**n)-1),min=0,max=2**n))  #note: this is 1
	Piquart= Signal(intbv(int(round((pi/4.0)*2**n)),min=0,max=2**n))  #note: this is half of pihalf
	Inv24  = Signal(intbv(int(round((1/24.0)*2**n)),min=0,max=2**n))
	Inv720 = Signal(intbv(int(round((1/720.0)*2**n)),min=0,max=2**n))
	X  = Signal(intbv(0,min=0,max=2**(n+1)))
	X2 = Signal(intbv(0,min=0,max=2**(n+1)))
	X4 = Signal(intbv(0,min=0,max=2**(n+1)))
	X6 = Signal(intbv(0,min=0,max=2**(n+1)))
	t1 = Signal(intbv(0,min=0,max=2**(n+1)))
	t2 = Signal(intbv(0,min=0,max=2**(n+1)))
	t3 = Signal(intbv(0,min=0,max=2**(n+1)))
	#helpers:
	XL1 = Signal(intbv(0)[len(x0)+len(Piquart):])
	XL2 = Signal(intbv(0)[len(X)+len(X):])
	XL3 = Signal(intbv(0)[len(X2)+len(X2):])
	XL4 = Signal(intbv(0)[len(X4)+len(Inv24):])
	XL5 = Signal(intbv(0)[len(X4)+len(X2):])
	XL6 = Signal(intbv(0)[len(X6)+len(Inv720):])

	@always(x0,sector)
	def normalize():
		if sector==0 or sector==2:
			XL1.next=x0*Piquart
		else:
			XL1.next=(One-x0)*Piquart

	@always_comb
	def stripX():
		X.next = XL1[:n-1]   # 2 * X/2 to compensate for piquart/pihalf

	@always_comb
	def calc0():
		XL2.next=X*X

	@always_comb
	def stripX2():
		X2.next = XL2[:n+1]   # 1/2 X**2
		t1.next = XL2[:n+1]   # 1/2 X**2

	@always_comb
	def calc1():
		XL3.next=X2*X2

	@always_comb
	def stripX4():
		X4.next=XL3[:n]    #1/4 X**4 ( 1/2 X**4 results in overflow!)

	@always_comb
	def calc2():
		XL4.next=X4*Inv24

	@always_comb
	def stript2():
		t2.next=XL4[:n-2]   #*4
		XL5.next=X4*X2		# 1/8 X**6

	@always_comb
	def calc3():
		X6.next=XL5[:n]	#*1

	@always_comb
	def calc4():
		XL6.next=X6*Inv720

	@always_comb
	def stript3():
		t3.next=XL6[:n-3]   #*8 ?

	@always_comb
	def calcsum():
		if sector==0 or sector==3:
			#y0.next = (One-t1+t2-t3)>>1 #debug: look at one term at a time
			y0.next = (One-t1+t2-t3)>>1
		else:
			#y0.next = (t1-One-t2+t3)>>1
			y0.next = (t1-One-t2+t3)>>1

	return instances()

reyaks · June 13, 2021, 1:17pm

Hello, much thanks for the code, could you please also share the code which is generating the vhdl out of that?

Topic		Replies	Views
From myHDL to syntesis Support	8	1036	August 7, 2018
Best practice: my conclusion after months of development Showcase	2	569	November 12, 2022
Initialisation behaviour can be problematic Showcase	6	474	November 19, 2022
Biquad filter produces garbage (solved) Support	6	964	April 5, 2018
New to MyHDL - how should I connect outputs to inputs properly? Support	3	33	January 24, 2025

Result of multiplication is zero. Not so in simulation

Related topics