Plaster
New
List
Login
common-lisp
default
anonymous
2025.12.27 13:25:46
#-sbcl (error "SBCL only!") (require 'sb-simd) ;; adapted from leibniz_avx2.cpp. (defun leibniz (rounds) (declare (optimize speed) (fixnum rounds)) (incf rounds 2) ; do this outside the loop (let ((unroll 4) (x (sb-simd-avx:make-f64.4 -1.0 1.0 -1.0 1.0)) (den (sb-simd-avx:f64.4 0.0)) (ivec (sb-simd-avx:make-f64.4 2.0 3.0 4.0 5.0)) (pivec (sb-simd-avx:f64.4 0.0))) (let ((vec-end (- rounds (mod rounds unroll)))) (loop for i from 2 below vec-end by unroll do ;; compute den = (2 * i - 1) (setf den (sb-simd-avx:f64.4- (sb-simd-avx:f64.4* 2.0 ivec) 1.0) ;; increment ivec, so ivec +=inc ivec (sb-simd-avx:f64.4+ ivec 4.0) ;; compute partial sums pivec (sb-simd-avx:f64.4+ pivec (sb-simd-avx:f64.4/ x den)))) ;; gather the partial sums (let ((underscore-pi (+ 1.0d0 (sb-simd-avx:f64.4-horizontal+ pivec)))) ;; now the wind-down loop (loop with underscore-x = 1.0d0 for i from vec-end below rounds do (setf underscore-x (- underscore-x) underscore-pi (+ underscore-pi (* underscore-x (/ (- (* 2.0 i) 1.0)))))) (* underscore-pi 4.0d0))))) #-swank (with-open-file (in "rounds.txt") (let ((*read-default-float-format* 'double-float) (n (parse-integer (read-line in)))) (princ (leibniz n)) (fresh-line))) ;;; The hot loop BTW: ;;; 050: L0: C5FC1015E8FEFFFF VMOVUPS YMM2, [RIP-280] ; [#x120C5E9F40] ;;; 058: C5ED59D0 VMULPD YMM2, YMM2, YMM0 ;;; 05C: C5FC101DFCFEFFFF VMOVUPS YMM3, [RIP-260] ; [#x120C5E9F60] ;;; 064: C5ED58D3 VADDPD YMM2, YMM2, YMM3 ;;; 068: C5FC101D10FFFFFF VMOVUPS YMM3, [RIP-240] ; [#x120C5E9F80] ;;; 070: C5FD58C3 VADDPD YMM0, YMM0, YMM3 ;;; 074: C5FC101D24FFFFFF VMOVUPS YMM3, [RIP-220] ; [#x120C5E9FA0] ;;; 07C: C5E55ED2 VDIVPD YMM2, YMM3, YMM2 ;;; 080: C5F558CA VADDPD YMM1, YMM1, YMM2 ;;; 084: 4883C104 ADD RCX, 4 ;;; 088: L1: 4839D9 CMP RCX, RBX ;;; 08B: 7CC3 JL L0 ;;; ;;; GCC rather generates ;;; 1180: c5 f5 58 c1 vaddpd ymm0,ymm1,ymm1 ;;; 1184: 83 c2 04 add edx,0x4 ;;; 1187: c5 f5 58 cc vaddpd ymm1,ymm1,ymm4 ;;; 118b: c5 fd 58 c5 vaddpd ymm0,ymm0,ymm5 ;;; 118f: c5 e5 5e c0 vdivpd ymm0,ymm3,ymm0 ;;; 1193: c5 ed 58 d0 vaddpd ymm2,ymm2,ymm0 ;;; 1197: 39 c2 cmp edx,eax ;;; 1199: 72 e5 jb 1180 <main+0xa0> ;;; loading the vectors outside of the loop. Otherwise that's all pretty same-y.
Raw
Annotate
Repaste
Edit