1                                  ; horner_64.asm  Horners method of evaluating polynomials
     2                                  ;
     3                                  ; given a polynomial  Y = a_n X^n + a_n-1 X^n-1 + ... a_1 X + a_0
     4                                  ; a_n is the coefficient 'a' with subscript n. X^n is X to nth power
     5                                  ; compute y_1 = a_n * X + a_n-1
     6                                  ; compute y_2 = y_1 * X + a_n-2
     7                                  ; compute y_i = y_i-1 * X + a_n-i   i=3..n
     8                                  ; thus    y_n = Y = value of polynomial 
     9                                  ;
    10                                  ; in assembly language:
    11                                  ;   load some register with a_n, multiply by X
    12                                  ;   add a_n-1, multiply by X, add a_n-2, multiply by X, ...
    13                                  ;   finishing with the add  a_0
    14                                  ;
    15                                  ; output from execution:
    16                                  ; a  6319
    17                                  ; aa 6319
    18                                  ; af 6.319000e+03
    19                                  
    20                                  	extern	printf
    21                                  	section	.data
    22                                  	global	main
    23                                  
    24                                  	section	.data
    25 00000000 612020256C640A00        fmta:	db	"a  %ld",10,0
    26 00000008 616120256C640A00        fmtaa:	db	"aa %ld",10,0
    27 00000010 61662025650A00          fmtflt:	db	"af %e",10,0
    28                                  
    29                                  	section	.text
    30 00000000 55                      main:	push	rbp		; set up stack
    31                                  
    32                                  ; evaluate an integer polynomial, X=7, using a count
    33                                  
    34                                  	section	.data
    35 00000017 020000000000000005-     a:	dq	2,5,-7,22,-9	; coefficients of polynomial, a_n first
    36 00000020 00000000000000F9FF-
    37 00000029 FFFFFFFFFFFF160000-
    38 00000032 0000000000F7FFFFFF-
    39 0000003B FFFFFFFF           
    40 0000003F 0700000000000000        X:	dq	7		; X = 7
    41                                  				; n=4, 8 bytes per coefficient
    42                                  	section	.text
    43 00000001 488B0425[17000000]      	mov	rax,[a]		; accumulate value here, get coefficient a_n
    44 00000009 BF01000000              	mov	rdi,1		; subscript initialization
    45 0000000E B904000000              	mov	rcx,4		; loop iteration count initialization, n
    46 00000013 480FAF0425-             h3loop:	imul	rax,[X]		; * X     (ignore edx)
    47 00000018 [3F000000]         
    48 0000001C 480304FD[17000000]      	add	rax,[a+8*rdi]	; + a_n-i
    49 00000024 48FFC7                  	inc	rdi		; increment subscript
    50 00000027 E2EA                    	loop	h3loop		; decrement rcx, jump on non zero
    51                                  
    52 00000029 4889C6                  	mov	rsi, rax	; print rax
    53 0000002C 48BF-                   	mov	rdi, fmta	; format
    54 0000002E [0000000000000000] 
    55 00000036 B800000000              	mov	rax, 0		; no float
    56 0000003B E8(00000000)            	call	printf
    57                                  
    58                                  
    59                                  ; evaluate an integer polynomial, X=7, using a count as index
    60                                  ; optimal organization of data allows a three instruction loop
    61                                  	
    62                                  	section	.data
    63 00000047 F7FFFFFFFFFFFFFF16-     aa:	dq	-9,22,-7,5,2	; coefficients of polynomial, a_0 first
    64 00000050 00000000000000F9FF-
    65 00000059 FFFFFFFFFFFF050000-
    66 00000062 000000000002000000-
    67 0000006B 00000000           
    68 0000006F 0400000000000000        n:	dq	4		; n=4, 8 bytes per coefficient
    69                                  	section	.text
    70 00000040 488B0425[67000000]      	mov	rax,[aa+4*8]	; accumulate value here, get coefficient a_n
    71 00000048 488B0C25[6F000000]      	mov	rcx,[n]		; loop iteration count initialization, n
    72 00000050 480FAF0425-             h4loop:	imul	rax,[X]		; * X     (ignore edx)
    73 00000055 [3F000000]         
    74 00000059 480304CD[3F000000]      	add	rax,[aa+8*rcx-8]; + aa_n-i
    75 00000061 E2ED                    	loop	h4loop		; decrement rcx, jump on non zero
    76                                  
    77 00000063 4889C6                  	mov	rsi, rax	; print rax
    78 00000066 48BF-                   	mov	rdi, fmtaa	; format
    79 00000068 [0800000000000000] 
    80 00000070 B800000000              	mov	rax, 0		; no float
    81 00000075 E8(00000000)            	call	printf
    82                                  
    83                                  ; evaluate a double floating polynomial, X=7.0, using a count as index
    84                                  ; optimal organization of data allows a three instruction loop
    85                                  	
    86                                  	section	.data
    87 00000077 00000000000022C000-     af:	dq	-9.0,22.0,-7.0,5.0,2.0	; coefficients of polynomial, a_0 first
    88 00000080 000000000036400000-
    89 00000089 000000001CC0000000-
    90 00000092 000000144000000000-
    91 0000009B 00000040           
    92 0000009F 0000000000001C40        XF:	dq	7.0
    93 000000A7 0000000000000000        Y:	dq	0.0
    94 000000AF 04000000                N:	dd	4
    95                                  
    96                                  	section	.text
    97 0000007A 488B0C25[AF000000]      	mov	rcx,[N]		; loop iteration count initialization, n
    98 00000082 DD04CD[77000000]        	fld	qword [af+8*rcx]; accumulate value here, get coefficient a_n
    99 00000089 DC0C25[9F000000]        h5loop:	fmul	qword [XF]	; * XF
   100 00000090 DC04CD[6F000000]        	fadd	qword [af+8*rcx-8] ; + aa_n-i
   101 00000097 E2F0                    	loop	h5loop		; decrement rcx, jump on non zero
   102                                  
   103 00000099 DD1C25[A7000000]        	fstp	qword [Y]	; store Y in order to print Y
   104 000000A0 F30F7E0425-             	movq	xmm0, qword [Y]	; well, may just mov reg
   105 000000A5 [A7000000]         
   106 000000A9 48BF-                   	mov	rdi, fmtflt	; format
   107 000000AB [1000000000000000] 
   108 000000B3 B801000000              	mov	rax, 1		; one float
   109 000000B8 E8(00000000)            	call	printf
   110                                  
   111 000000BD 5D                      	pop	rbp		; restore stack
   112 000000BE B800000000              	mov	rax,0		; normal return
   113 000000C3 C3                      	ret			; return