diff --git a/IMPLEMENTATION.md b/IMPLEMENTATION.md index 3613777b9..9d7f74b0f 100644 --- a/IMPLEMENTATION.md +++ b/IMPLEMENTATION.md @@ -350,7 +350,7 @@ type word indicates a vector. The fixnum value is the vector's length in words/objects, but shifted up by 1 bit, and then the low bit is set to 1 for an immutable vector. -Most kinds of Scheme values are represented records, so the layout is +Most kinds of Scheme values are represented as records, so the layout is defined by `define-record-type` and similar. For the primitive object types that are not records (and even a few that are), the layouts are defined in "cmacros.ss". For example, an `exactnum` (i.e., a complex @@ -367,7 +367,7 @@ The `type-typed-object` in the first line indicates that an `exactnum` is represented by a pointer that is tagged with `type-typed-object`, and so we should expect the first field to be a type word. That's why the first field above is `type`, and it turns out that it will always -contain the value `type-inexactnum`. The `iptr` type for `type` means +contain the value `type-exactnum`. The `iptr` type for `type` means "a pointer-sized signed integer". The `ptr` type for `real` and `imag` means "pointer" or "Scheme object". diff --git a/c/segment.c b/c/segment.c index 1da58bfe9..241cf92fb 100644 --- a/c/segment.c +++ b/c/segment.c @@ -584,7 +584,7 @@ static void contract_segment_table(uptr base, uptr end) { } /* Bracket all writes to `space_code` memory with calls to - `S_thread_start_code_write` and `S_thread_start_code_write'. + `S_thread_start_code_write` and `S_thread_end_code_write'. On a platform where a page cannot be both writable and executable at the same time (a.k.a. W^X), AND assuming that the disposition is diff --git a/s/arm32.ss b/s/arm32.ss index 5c0ae0242..966929d8f 100644 --- a/s/arm32.ss +++ b/s/arm32.ss @@ -580,11 +580,11 @@ (define-instruction value (fpcastto/hi) [(op (x ur) (y fpmem)) `(set! ,(make-live-info) ,x ,(fpmem->mem y 'hi))] [(op (x ur) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpcastto 'hi) ,y))]) - + (define-instruction value (fpcastto/lo) [(op (x ur) (y fpmem)) `(set! ,(make-live-info) ,x ,(fpmem->mem y 'lo))] [(op (x ur) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpcastto 'lo) ,y))]) - + (define-instruction value (fpcastfrom) [(op (x fpmem) (hi ur) (lo ur)) (seq `(set! ,(make-live-info) ,(fpmem->mem x 'lo) ,lo) @@ -677,7 +677,7 @@ `(asm ,info ,asm-unactivate-thread ,x ,u ,ulr)))]) (define-instruction value (asmlibcall) - [(op (z ur)) + [(op (z ur)) (let ([u (make-tmp 'u)]) (if (info-asmlib-save-ra? info) (seq @@ -795,11 +795,11 @@ [(op (x ur) (y ur) (w imm-constant) (old ur) (new ur)) (lea->reg x y w (lambda (r) - (let ([u1 (make-tmp 'u1)] [u2 (make-tmp 'u2)]) + (let ([u1 (make-tmp 'u1)] [u2 (make-tmp 'u2)]) (seq `(set! ,(make-live-info) ,u1 (asm ,null-info ,asm-kill)) `(set! ,(make-live-info) ,u2 (asm ,null-info ,asm-kill)) - `(asm ,info ,asm-cas ,r ,old ,new ,u1 ,u2)))))])) + `(asm ,info ,asm-cas ,r ,old ,new ,u1 ,u2)))))])) (define-instruction effect (store-store-fence) [(op) @@ -808,7 +808,7 @@ (define-instruction effect (acquire-fence) [(op) `(asm ,info ,(asm-fence 'acquire))]) - + (define-instruction effect (release-fence) [(op) `(asm ,info ,(asm-fence 'release))]) @@ -1259,8 +1259,8 @@ (define dmb-op (lambda (op opcode code*) (emit-code (op code*) - [4 #b1111010101111111111100000101] - [0 opcode]))) + [4 #b1111010101111111111100000101] + [0 opcode]))) (define branch-imm-op (lambda (op cond-bits disp code*) @@ -1516,7 +1516,7 @@ [8 (fxsrl n 4)] [4 opcode2] [0 (fxlogand n #xf)])))) - + (define load-lit-op (lambda (op dest-ea orig-disp code*) (let-values ([(U disp) (if (fx< orig-disp 0) (values 0 (fx- orig-disp)) (values 1 orig-disp))]) @@ -1624,7 +1624,7 @@ [(_ (op opnd ... ?code*) chunk ...) (build-maybe-cons* #'((build long (byte-fields chunk ...))) #'(aop-cons* `(asm ,op ,opnd ...) ?code*))]))) - + #;(define-syntax emit-code (lambda (x) (syntax-case x () @@ -1708,11 +1708,11 @@ (define branch-disp? (lambda (x) - (and (fixnum? x) + (and (fixnum? x) ; -4 accounts for fact that pc reads as the instruction after next, not next (fx<= (- (expt 2 25)) (fx- x 4) (- (expt 2 25) 1)) (not (fxlogtest x #b11))))) - + (define asm-size (lambda (x) (case (car x) @@ -2133,7 +2133,7 @@ (rec asm-c-simple-call-internal (lambda (code* jmp-tmp . ignore) (asm-helper-call code* target save-ra? jmp-tmp)))))) - + (define-who asm-indirect-call (lambda (code* dest lr . ignore) (safe-assert (eq? lr %lr)) @@ -2368,7 +2368,7 @@ (emit mvn dest src code*)))) (define asm-enter values) - + (define-who asm-inc-cc-counter (lambda (code* addr val tmp) (Trivit (addr val tmp) @@ -2387,36 +2387,37 @@ (lambda (code*) (emit addi #f tmp tmp 1 code*)) code*)))))) - + (module (asm-foreign-call asm-foreign-callable) (define align (lambda (b x) (let ([k (- b 1)]) (fxlogand (fx+ x k) (fxlognot k))))) - (define (double-member? m) (and (eq? (car m) 'float) - (fx= (cadr m) 8))) + (define (double-member? m) + (and (eq? (car m) 'float) + (fx= (cadr m) 8))) (define (float-member? m) (and (eq? (car m) 'float) - (fx= (cadr m) 4))) + (fx= (cadr m) 4))) (define (indirect-result-that-fits-in-registers? result-type) (nanopass-case (Ltype Type) result-type [(fp-ftd& ,ftd) - (let* ([members ($ftd->members ftd)] - [num-members (length members)]) - (or (fx<= ($ftd-size ftd) 4) - (and (fx= num-members 1) - ;; a struct containing only int64 is not returned in a register - (or (not ($ftd-compound? ftd)))) - (and (fx<= num-members 4) - (or (andmap double-member? members) - (andmap float-member? members)))))] - [else #f])) + (let* ([members ($ftd->members ftd)] + [num-members (length members)]) + (or (fx<= ($ftd-size ftd) 4) + (and (fx= num-members 1) + ;; a struct containing only int64 is not returned in a register + (or (not ($ftd-compound? ftd)))) + (and (fx<= num-members 4) + (or (andmap double-member? members) + (andmap float-member? members)))))] + [else #f])) (define num-int-regs 4) ; number of integer registers normally usd by the ABI (define num-dbl-regs 8) ; number of `double` registers normally usd by the ABI (define sgl-regs (lambda () (list %Cfparg1 %Cfparg1b %Cfparg2 %Cfparg2b %Cfparg3 %Cfparg3b %Cfparg4 %Cfparg4b - %Cfparg5 %Cfparg5b %Cfparg6 %Cfparg6b %Cfparg7 %Cfparg7b %Cfparg8 %Cfparg8b))) + %Cfparg5 %Cfparg5b %Cfparg6 %Cfparg6b %Cfparg7 %Cfparg7b %Cfparg8 %Cfparg8b))) (define save-and-restore (lambda (regs e) (safe-assert (andmap reg? regs)) - (with-output-language (L13 Effect) + (with-output-language (L13 Effect) (let ([save-and-restore-gp - (lambda (regs e) + (lambda (regs e) (let* ([regs (filter (lambda (r) (not (eq? (reg-type r) 'fp))) regs)] [regs (if (fxodd? (length regs)) (cons %tc regs) ; keep doubleword aligned @@ -2425,125 +2426,128 @@ [(null? regs) e] [else (%seq - (inline ,(make-info-kill*-live* '() regs) ,%push-multiple) - ,e - (inline ,(make-info-kill*-live* regs '()) ,%pop-multiple))])))] - [save-and-restore-fp - (lambda (regs e) + (inline ,(make-info-kill*-live* '() regs) ,%push-multiple) + ,e + (inline ,(make-info-kill*-live* regs '()) ,%pop-multiple))])))] + [save-and-restore-fp + (lambda (regs e) (let ([fp-regs (filter (lambda (r) (eq? (reg-type r) 'fp)) regs)]) (cond [(null? fp-regs) e] [else (let ([info (make-info-vpush (car fp-regs) (length fp-regs))]) (%seq - (inline ,info ,%vpush-multiple) - ,e - (inline ,info ,%vpop-multiple)))])))]) + (inline ,info ,%vpush-multiple) + ,e + (inline ,info ,%vpop-multiple)))])))]) (save-and-restore-gp regs (save-and-restore-fp regs e)))))) (define-who asm-foreign-call (with-output-language (L13 Effect) (define int-regs (lambda () (list %Carg1 %Carg2 %Carg3 %Carg4))) (letrec ([load-double-stack - (lambda (offset) - (lambda (x) ; unboxed + (lambda (offset) + (lambda (x) ; unboxed `(set! ,(%mref ,%sp ,%zero ,offset fp) ,x)))] [load-single-stack - (lambda (offset) - (lambda (x) ; unboxed + (lambda (offset) + (lambda (x) ; unboxed (%inline store-double->single ,(%mref ,%sp ,%zero ,offset fp) ,x)))] [load-int-stack - (lambda (offset) - (lambda (rhs) ; requires rhs - `(set! ,(%mref ,%sp ,offset) ,rhs)))] + (lambda (offset) + (lambda (rhs) ; requires rhs + `(set! ,(%mref ,%sp ,offset) ,rhs)))] [load-int64-stack - (lambda (offset) - (lambda (lorhs hirhs) ; requires rhs - (%seq - (set! ,(%mref ,%sp ,offset) ,lorhs) - (set! ,(%mref ,%sp ,(fx+ offset 4)) ,hirhs))))] + (lambda (offset) + (lambda (lorhs hirhs) ; requires rhs + (%seq + (set! ,(%mref ,%sp ,offset) ,lorhs) + (set! ,(%mref ,%sp ,(fx+ offset 4)) ,hirhs))))] [load-int-indirect-stack - (lambda (offset from-offset size unsigned?) - (lambda (x) ; requires var - (case size - [(3) - (%seq - (set! ,(%mref ,%sp ,offset) (inline ,(make-info-load 'integer-16 #f) ,%load ,x ,%zero (immediate ,from-offset))) - (set! ,(%mref ,%sp ,(fx+ offset 2)) (inline ,(make-info-load 'integer-8 #f) ,%load ,x ,%zero (immediate ,(fx+ from-offset 2)))))] - [else - `(set! ,(%mref ,%sp ,offset) ,(case size - [(1) `(inline ,(make-info-load (if unsigned? 'unsigned-8 'integer-8) #f) ,%load ,x ,%zero (immediate ,from-offset))] - [(2) `(inline ,(make-info-load (if unsigned? 'unsigned-16 'integer-16) #f) ,%load ,x ,%zero (immediate ,from-offset))] - [(4) (%mref ,x ,from-offset)]))])))] + (lambda (offset from-offset size unsigned?) + (lambda (x) ; requires var + (case size + [(3) + (%seq + (set! ,(%mref ,%sp ,offset) + (inline ,(make-info-load 'integer-16 #f) ,%load ,x ,%zero (immediate ,from-offset))) + (set! ,(%mref ,%sp ,(fx+ offset 2)) + (inline ,(make-info-load 'integer-8 #f) ,%load ,x ,%zero (immediate ,(fx+ from-offset 2)))))] + [else + `(set! ,(%mref ,%sp ,offset) + ,(case size + [(1) `(inline ,(make-info-load (if unsigned? 'unsigned-8 'integer-8) #f) ,%load ,x ,%zero (immediate ,from-offset))] + [(2) `(inline ,(make-info-load (if unsigned? 'unsigned-16 'integer-16) #f) ,%load ,x ,%zero (immediate ,from-offset))] + [(4) (%mref ,x ,from-offset)]))])))] [load-int64-indirect-stack - (lambda (offset from-offset) - (lambda (x) ; requires var - (%seq - (set! ,(%mref ,%sp ,offset) ,(%mref ,x ,from-offset)) - (set! ,(%mref ,%sp ,(fx+ offset 4)) ,(%mref ,x ,(fx+ from-offset 4))))))] + (lambda (offset from-offset) + (lambda (x) ; requires var + (%seq + (set! ,(%mref ,%sp ,offset) ,(%mref ,x ,from-offset)) + (set! ,(%mref ,%sp ,(fx+ offset 4)) ,(%mref ,x ,(fx+ from-offset 4))))))] [load-double-reg - (lambda (fpreg) - (lambda (x) ; unboxed - `(set! ,fpreg ,x)))] + (lambda (fpreg) + (lambda (x) ; unboxed + `(set! ,fpreg ,x)))] [load-single-reg - (lambda (fpreg single?) - (lambda (x) ; unboxed - (let ([%op (if single? %load-single %double->single)]) - `(set! ,fpreg (inline ,null-info ,%op ,x)))))] + (lambda (fpreg single?) + (lambda (x) ; unboxed + (let ([%op (if single? %load-single %double->single)]) + `(set! ,fpreg (inline ,null-info ,%op ,x)))))] [load-double-int-reg - (lambda (loreg hireg) - (lambda (x) ; unboxed - (%seq - (set! ,loreg ,(%inline fpcastto/lo ,x)) - (set! ,hireg ,(%inline fpcastto/hi ,x)))))] - [load-single-int-reg - (lambda (reg) - (lambda (x) ; unboxed - (%seq - ;; we can use `%Cfparg1` because this only happens - ;; when FP registers are not used for arguments - (set! ,%Cfparg1 ,(%inline double->single ,x)) - (set! ,reg ,(%inline fpcastto/lo ,%Cfparg1)))))] + (lambda (loreg hireg) + (lambda (x) ; unboxed + (%seq + (set! ,loreg ,(%inline fpcastto/lo ,x)) + (set! ,hireg ,(%inline fpcastto/hi ,x)))))] + [load-single-int-reg + (lambda (reg) + (lambda (x) ; unboxed + (%seq + ;; we can use `%Cfparg1` because this only happens + ;; when FP registers are not used for arguments + (set! ,%Cfparg1 ,(%inline double->single ,x)) + (set! ,reg ,(%inline fpcastto/lo ,%Cfparg1)))))] [load-boxed-double-reg - (lambda (fpreg fp-disp) - (lambda (x) ; address (always a var) of a flonum - `(set! ,fpreg ,(%mref ,x ,%zero ,fp-disp fp))))] + (lambda (fpreg fp-disp) + (lambda (x) ; address (always a var) of a flonum + `(set! ,fpreg ,(%mref ,x ,%zero ,fp-disp fp))))] [load-boxed-single-reg - (lambda (fpreg fp-disp single?) - (lambda (x) ; address (always a var) of a flonum - (let ([%op (if single? %load-single %double->single)]) - `(set! ,fpreg (inline ,null-info ,%op ,(%mref ,x ,%zero ,fp-disp fp))))))] + (lambda (fpreg fp-disp single?) + (lambda (x) ; address (always a var) of a flonum + (let ([%op (if single? %load-single %double->single)]) + `(set! ,fpreg (inline ,null-info ,%op ,(%mref ,x ,%zero ,fp-disp fp))))))] [load-int-reg - (lambda (ireg) - (lambda (x) - `(set! ,ireg ,x)))] + (lambda (ireg) + (lambda (x) + `(set! ,ireg ,x)))] [load-int64-reg - (lambda (loreg hireg) - (lambda (lo hi) - (%seq - (set! ,loreg ,lo) - (set! ,hireg ,hi))))] + (lambda (loreg hireg) + (lambda (lo hi) + (%seq + (set! ,loreg ,lo) + (set! ,hireg ,hi))))] [load-int-indirect-reg - (lambda (ireg from-offset size unsigned?) - (lambda (x) - (case size - [(3) - (let ([tmp %lr]) ; ok to use %lr here? - (%seq - (set! ,ireg (inline ,(make-info-load 'integer-16 #f) ,%load ,x ,%zero (immediate ,from-offset))) - (set! ,tmp (inline ,(make-info-load 'integer-8 #f) ,%load ,x ,%zero (immediate ,(fx+ from-offset 2)))) - (set! ,tmp ,(%inline sll ,tmp (immediate 16))) - (set! ,ireg ,(%inline + ,ireg ,tmp))))] - [else - `(set! ,ireg ,(case size - [(1) `(inline ,(make-info-load (if unsigned? 'unsigned-8 'integer-8) #f) ,%load ,x ,%zero (immediate ,from-offset))] - [(2) `(inline ,(make-info-load (if unsigned? 'unsigned-16 'integer-16) #f) ,%load ,x ,%zero (immediate ,from-offset))] - [(4) (%mref ,x ,from-offset)]))])))] + (lambda (ireg from-offset size unsigned?) + (lambda (x) + (case size + [(3) + (let ([tmp %lr]) ; ok to use %lr here? + (%seq + (set! ,ireg (inline ,(make-info-load 'integer-16 #f) ,%load ,x ,%zero (immediate ,from-offset))) + (set! ,tmp (inline ,(make-info-load 'integer-8 #f) ,%load ,x ,%zero (immediate ,(fx+ from-offset 2)))) + (set! ,tmp ,(%inline sll ,tmp (immediate 16))) + (set! ,ireg ,(%inline + ,ireg ,tmp))))] + [else + `(set! ,ireg ,(case size + [(1) `(inline ,(make-info-load (if unsigned? 'unsigned-8 'integer-8) #f) ,%load ,x ,%zero (immediate ,from-offset))] + [(2) `(inline ,(make-info-load (if unsigned? 'unsigned-16 'integer-16) #f) ,%load ,x ,%zero (immediate ,from-offset))] + [(4) (%mref ,x ,from-offset)]))])))] [load-int64-indirect-reg - (lambda (loreg hireg from-offset) - (lambda (x) - (%seq - (set! ,loreg ,(%mref ,x ,from-offset)) - (set! ,hireg ,(%mref ,x ,(fx+ from-offset 4))))))] + (lambda (loreg hireg from-offset) + (lambda (x) + (%seq + (set! ,loreg ,(%mref ,x ,from-offset)) + (set! ,hireg ,(%mref ,x ,(fx+ from-offset 4))))))] [do-args (lambda (types varargs?) ; sgl* is always of even-length, i.e., has a sgl/dbl reg first @@ -2554,190 +2558,190 @@ (nanopass-case (Ltype Type) (car types) [(fp-double-float) (cond - [(and varargs? - ;; For varargs, treat a double like a 64-bit integer - (let ([int* (if (even? (length int*)) int* (cdr int*))]) - (and (pair? int*) - int*))) - => (lambda (int*) - (loop (cdr types) - (cons (load-double-int-reg (car int*) (cadr int*)) locs) - (cons* (car int*) (cadr int*) live*) (cddr int*) sgl* bsgl isp))] - [(null? sgl*) - (let ([isp (align 8 isp)]) - (loop (cdr types) - (cons (load-double-stack isp) locs) - live* int* '() #f (fx+ isp 8)))] - [else - (loop (cdr types) - (cons (load-double-reg (car sgl*)) locs) - (cons (car sgl*) live*) int* (cddr sgl*) bsgl isp)])] + [(and varargs? + ;; For varargs, treat a double like a 64-bit integer + (let ([int* (if (even? (length int*)) int* (cdr int*))]) + (and (pair? int*) + int*))) + => (lambda (int*) + (loop (cdr types) + (cons (load-double-int-reg (car int*) (cadr int*)) locs) + (cons* (car int*) (cadr int*) live*) (cddr int*) sgl* bsgl isp))] + [(null? sgl*) + (let ([isp (align 8 isp)]) + (loop (cdr types) + (cons (load-double-stack isp) locs) + live* int* '() #f (fx+ isp 8)))] + [else + (loop (cdr types) + (cons (load-double-reg (car sgl*)) locs) + (cons (car sgl*) live*) int* (cddr sgl*) bsgl isp)])] [(fp-single-float) - (cond - [bsgl + (cond + [bsgl + (loop (cdr types) + (cons (load-single-reg bsgl #f) locs) + (cons bsgl live*) int* sgl* #f isp)] + [(and (not (null? sgl*)) + (not varargs?)) (loop (cdr types) - (cons (load-single-reg bsgl #f) locs) - (cons bsgl live*) int* sgl* #f isp)] - [(and (not (null? sgl*)) - (not varargs?)) + (cons (load-single-reg (car sgl*) #f) locs) + (cons (car sgl*) live*) int* (cddr sgl*) (cadr sgl*) isp)] + [(and varargs? + (not (null? int*))) (loop (cdr types) - (cons (load-single-reg (car sgl*) #f) locs) - (cons (car sgl*) live*) int* (cddr sgl*) (cadr sgl*) isp)] - [(and varargs? - (not (null? int*))) - (loop (cdr types) - (cons (load-single-int-reg (car int*)) locs) - (cons* (car int*) live*) (cdr int*) sgl* bsgl isp)] - [else + (cons (load-single-int-reg (car int*)) locs) + (cons* (car int*) live*) (cdr int*) sgl* bsgl isp)] + [else (loop (cdr types) - (cons (load-single-stack isp) locs) - live* int* '() #f (fx+ isp 4))])] - [(fp-ftd& ,ftd) - (let ([size ($ftd-size ftd)] - [members ($ftd->members ftd)] - [combine-loc (lambda (loc f) - (if loc - (lambda (x) (%seq ,(loc x) ,(f x))) - f))]) - (case ($ftd-alignment ftd) - [(8) - (let* ([int* (if (even? (length int*)) int* (cdr int*))] - [num-members (length members)] - [doubles? (and (not varargs?) + (cons (load-single-stack isp) locs) + live* int* '() #f (fx+ isp 4))])] + [(fp-ftd& ,ftd) + (let ([size ($ftd-size ftd)] + [members ($ftd->members ftd)] + [combine-loc (lambda (loc f) + (if loc + (lambda (x) (%seq ,(loc x) ,(f x))) + f))]) + (case ($ftd-alignment ftd) + [(8) + (let* ([int* (if (even? (length int*)) int* (cdr int*))] + [num-members (length members)] + [doubles? (and (not varargs?) (fx<= num-members 4) - (andmap double-member? members))]) - ;; Sequence of up to 4 doubles that fits in registers? - (cond - [(and doubles? - (fx>= (length sgl*) (fx* 2 num-members))) - ;; Allocate each double to a register - (let dbl-loop ([size size] [offset 0] [live* live*] [sgl* sgl*] [loc #f]) - (cond - [(fx= size 0) - (loop (cdr types) (cons loc locs) live* int* sgl* #f isp)] - [else - (dbl-loop (fx- size 8) (fx+ offset 8) (cons (car sgl*) live*) (cddr sgl*) - (combine-loc loc (load-boxed-double-reg (car sgl*) offset)))]))] - [else - ;; General case; for non-doubles, use integer registers while available, - ;; possibly splitting between registers and stack - (let obj-loop ([size size] [offset 0] [loc #f] - [live* live*] [int* int*] [isp isp]) - (cond - [(fx= size 0) - (loop (cdr types) (cons loc locs) live* int* sgl* bsgl isp)] - [else - (if (or (null? int*) doubles?) - (let ([isp (align 8 isp)]) - (obj-loop (fx- size 8) (fx+ offset 8) - (combine-loc loc (load-int64-indirect-stack isp offset)) - live* int* (fx+ isp 8))) - (obj-loop (fx- size 8) (fx+ offset 8) - (combine-loc loc (load-int64-indirect-reg (car int*) (cadr int*) offset)) - (cons* (car int*) (cadr int*) live*) (cddr int*) isp))]))]))] - [else - (let* ([num-members (length members)] - [floats? (and (not varargs?) - (fx<= num-members 4) - (andmap float-member? members))]) - ;; Sequence of up to 4 floats that fits in registers? - (cond - [(and floats? - (not varargs?) - (fx>= (fx+ (length sgl*) (if bsgl 1 0)) num-members)) - ;; Allocate each float to register - (let flt-loop ([size size] [offset 0] [sgl* sgl*] [bsgl bsgl] [loc #f] [live* live*]) - (cond - [(fx= size 0) - (loop (cdr types) (cons loc locs) live* int* sgl* bsgl isp)] - [else - (flt-loop (fx- size 4) (fx+ offset 4) - (if bsgl sgl* (cddr sgl*)) - (if bsgl #f (cadr sgl*)) - (combine-loc loc (load-boxed-single-reg (or bsgl (car sgl*)) offset #t)) - (cons (or bsgl (car sgl*)) live*))]))] - [else - ;; General case; use integer registers while available, - ;; possibly splitting between registers and stack - (let obj-loop ([size size] [offset 0] [loc #f] - [live* live*] [int* int*] [isp isp]) - (cond - [(fx<= size 0) - (loop (cdr types) (cons loc locs) live* int* sgl* bsgl isp)] - [else - (if (or (null? int*) floats?) - (obj-loop (fx- size 4) (fx+ offset 4) - (combine-loc loc (load-int-indirect-stack isp offset (fxmin size 4) ($ftd-unsigned? ftd))) - live* int* (fx+ isp 4)) - (obj-loop (fx- size 4) (fx+ offset 4) - (combine-loc loc (load-int-indirect-reg (car int*) offset (fxmin size 4) ($ftd-unsigned? ftd))) - (cons (car int*) live*) (cdr int*) isp))]))]))]))] - [else + (andmap double-member? members))]) + ;; Sequence of up to 4 doubles that fits in registers? + (cond + [(and doubles? + (fx>= (length sgl*) (fx* 2 num-members))) + ;; Allocate each double to a register + (let dbl-loop ([size size] [offset 0] [live* live*] [sgl* sgl*] [loc #f]) + (cond + [(fx= size 0) + (loop (cdr types) (cons loc locs) live* int* sgl* #f isp)] + [else + (dbl-loop (fx- size 8) (fx+ offset 8) (cons (car sgl*) live*) (cddr sgl*) + (combine-loc loc (load-boxed-double-reg (car sgl*) offset)))]))] + [else + ;; General case; for non-doubles, use integer registers while available, + ;; possibly splitting between registers and stack + (let obj-loop ([size size] [offset 0] [loc #f] + [live* live*] [int* int*] [isp isp]) + (cond + [(fx= size 0) + (loop (cdr types) (cons loc locs) live* int* sgl* bsgl isp)] + [else + (if (or (null? int*) doubles?) + (let ([isp (align 8 isp)]) + (obj-loop (fx- size 8) (fx+ offset 8) + (combine-loc loc (load-int64-indirect-stack isp offset)) + live* int* (fx+ isp 8))) + (obj-loop (fx- size 8) (fx+ offset 8) + (combine-loc loc (load-int64-indirect-reg (car int*) (cadr int*) offset)) + (cons* (car int*) (cadr int*) live*) (cddr int*) isp))]))]))] + [else + (let* ([num-members (length members)] + [floats? (and (not varargs?) + (fx<= num-members 4) + (andmap float-member? members))]) + ;; Sequence of up to 4 floats that fits in registers? + (cond + [(and floats? + (not varargs?) + (fx>= (fx+ (length sgl*) (if bsgl 1 0)) num-members)) + ;; Allocate each float to register + (let flt-loop ([size size] [offset 0] [sgl* sgl*] [bsgl bsgl] [loc #f] [live* live*]) + (cond + [(fx= size 0) + (loop (cdr types) (cons loc locs) live* int* sgl* bsgl isp)] + [else + (flt-loop (fx- size 4) (fx+ offset 4) + (if bsgl sgl* (cddr sgl*)) + (if bsgl #f (cadr sgl*)) + (combine-loc loc (load-boxed-single-reg (or bsgl (car sgl*)) offset #t)) + (cons (or bsgl (car sgl*)) live*))]))] + [else + ;; General case; use integer registers while available, + ;; possibly splitting between registers and stack + (let obj-loop ([size size] [offset 0] [loc #f] + [live* live*] [int* int*] [isp isp]) + (cond + [(fx<= size 0) + (loop (cdr types) (cons loc locs) live* int* sgl* bsgl isp)] + [else + (if (or (null? int*) floats?) + (obj-loop (fx- size 4) (fx+ offset 4) + (combine-loc loc (load-int-indirect-stack isp offset (fxmin size 4) ($ftd-unsigned? ftd))) + live* int* (fx+ isp 4)) + (obj-loop (fx- size 4) (fx+ offset 4) + (combine-loc loc (load-int-indirect-reg (car int*) offset (fxmin size 4) ($ftd-unsigned? ftd))) + (cons (car int*) live*) (cdr int*) isp))]))]))]))] + [else (if (nanopass-case (Ltype Type) (car types) - [(fp-integer ,bits) (fx= bits 64)] - [(fp-unsigned ,bits) (fx= bits 64)] - [else #f]) + [(fp-integer ,bits) (fx= bits 64)] + [(fp-unsigned ,bits) (fx= bits 64)] + [else #f]) (let ([int* (if (even? (length int*)) int* (cdr int*))]) (if (null? int*) (let ([isp (align 8 isp)]) (loop (cdr types) - (cons (load-int64-stack isp) locs) - live* '() sgl* bsgl (fx+ isp 8))) + (cons (load-int64-stack isp) locs) + live* '() sgl* bsgl (fx+ isp 8))) (loop (cdr types) - (cons (load-int64-reg (car int*) (cadr int*)) locs) - (cons* (car int*) (cadr int*) live*) (cddr int*) sgl* bsgl isp))) + (cons (load-int64-reg (car int*) (cadr int*)) locs) + (cons* (car int*) (cadr int*) live*) (cddr int*) sgl* bsgl isp))) (if (null? int*) (loop (cdr types) - (cons (load-int-stack isp) locs) - live* '() sgl* bsgl (fx+ isp 4)) + (cons (load-int-stack isp) locs) + live* '() sgl* bsgl (fx+ isp 4)) (loop (cdr types) - (cons (load-int-reg (car int*)) locs) - (cons (car int*) live*) (cdr int*) sgl* bsgl isp)))]))))] - [add-fill-result - (lambda (fill-result-here? result-type args-frame-size e) - (cond - [fill-result-here? - (nanopass-case (Ltype Type) result-type - [(fp-ftd& ,ftd) - (let* ([members ($ftd->members ftd)] - [num-members (length members)] - ;; result pointer is stashed on the stack after all arguments: - [dest-x %r2] - [init-dest-e `(seq ,e (set! ,dest-x ,(%mref ,%sp ,args-frame-size)))]) - (cond - [(and (fx<= num-members 4) - (or (andmap double-member? members) - (andmap float-member? members))) - ;; double/float results are in floating-point registers - (let ([double? (and (pair? members) (double-member? (car members)))]) - (let loop ([members members] [sgl* (sgl-regs)] [offset 0] [e init-dest-e]) - (cond - [(null? members) e] - [else - (loop (cdr members) - (if double? (cddr sgl*) (cdr sgl*)) - (fx+ offset (if double? 8 4)) - `(seq - ,e + (cons (load-int-reg (car int*)) locs) + (cons (car int*) live*) (cdr int*) sgl* bsgl isp)))]))))] + [add-fill-result + (lambda (fill-result-here? result-type args-frame-size e) + (cond + [fill-result-here? + (nanopass-case (Ltype Type) result-type + [(fp-ftd& ,ftd) + (let* ([members ($ftd->members ftd)] + [num-members (length members)] + ;; result pointer is stashed on the stack after all arguments: + [dest-x %r2] + [init-dest-e `(seq ,e (set! ,dest-x ,(%mref ,%sp ,args-frame-size)))]) + (cond + [(and (fx<= num-members 4) + (or (andmap double-member? members) + (andmap float-member? members))) + ;; double/float results are in floating-point registers + (let ([double? (and (pair? members) (double-member? (car members)))]) + (let loop ([members members] [sgl* (sgl-regs)] [offset 0] [e init-dest-e]) + (cond + [(null? members) e] + [else + (loop (cdr members) + (if double? (cddr sgl*) (cdr sgl*)) + (fx+ offset (if double? 8 4)) + `(seq + ,e ,(if double? `(set! ,(%mref ,dest-x ,%zero ,offset fp) ,(car sgl*)) (%inline store-single ,(%mref ,dest-x ,%zero ,offset fp) ,(car sgl*)))))])))] - [else - ;; result is in %Cretval and maybe %r1 - `(seq - ,init-dest-e - ,(case ($ftd-size ftd) - [(1) `(inline ,(make-info-load 'integer-8 #f) ,%store ,dest-x ,%zero (immediate 0) ,%Cretval)] - [(2) `(inline ,(make-info-load 'integer-16 #f) ,%store ,dest-x ,%zero (immediate 0) ,%Cretval)] - [(3) (%seq - (inline ,(make-info-load 'integer-16 #f) ,%store ,dest-x ,%zero (immediate 0) ,%Cretval) - (set! ,%Cretval ,(%inline srl ,%Cretval (immediate 16))) - (inline ,(make-info-load 'integer-8 #f) ,%store ,dest-x ,%zero (immediate 2) ,%Cretval))] - [(4) `(set! ,(%mref ,dest-x ,0) ,%Cretval)] - [(8) `(seq - (set! ,(%mref ,dest-x ,0) ,%Cretval) - (set! ,(%mref ,dest-x ,4) ,%r1))]))]))])] - [else e]))] + [else + ;; result is in %Cretval and maybe %r1 + `(seq + ,init-dest-e + ,(case ($ftd-size ftd) + [(1) `(inline ,(make-info-load 'integer-8 #f) ,%store ,dest-x ,%zero (immediate 0) ,%Cretval)] + [(2) `(inline ,(make-info-load 'integer-16 #f) ,%store ,dest-x ,%zero (immediate 0) ,%Cretval)] + [(3) (%seq + (inline ,(make-info-load 'integer-16 #f) ,%store ,dest-x ,%zero (immediate 0) ,%Cretval) + (set! ,%Cretval ,(%inline srl ,%Cretval (immediate 16))) + (inline ,(make-info-load 'integer-8 #f) ,%store ,dest-x ,%zero (immediate 2) ,%Cretval))] + [(4) `(set! ,(%mref ,dest-x ,0) ,%Cretval)] + [(8) `(seq + (set! ,(%mref ,dest-x ,0) ,%Cretval) + (set! ,(%mref ,dest-x ,4) ,%r1))]))]))])] + [else e]))] [get-result-regs (lambda (result-type varargs?) (nanopass-case (Ltype Type) result-type @@ -2757,113 +2761,112 @@ (case bits [(64) (list %r1 %Cretval)] [else (list %Cretval)])] - [(fp-ftd& ,ftd) - (let* ([members ($ftd->members ftd)] - [num-members (length members)]) - (cond - [(and (fx<= num-members 4) - (or (andmap double-member? members) - (andmap float-member? members))) - ;; double/float results are in floating-point registers - (let ([double? (and (pair? members) (double-member? (car members)))]) - (let loop ([members members] [sgl* (sgl-regs)]) - (cond - [(null? members) '()] - [double? - (cons (car sgl*) (loop (cdr members) (cddr sgl*)))] - [else - (cons (car sgl*) (if (null? (cdr members)) - '() - (loop (cddr members) (cddr sgl*))))])))] - [else - ;; result is in %Cretval and maybe %r1 - (case ($ftd-size ftd) - [(8) (list %Cretval %r1)] - [else (list %Cretval)])]))] + [(fp-ftd& ,ftd) + (let* ([members ($ftd->members ftd)] + [num-members (length members)]) + (cond + [(and (fx<= num-members 4) + (or (andmap double-member? members) + (andmap float-member? members))) + ;; double/float results are in floating-point registers + (let ([double? (and (pair? members) (double-member? (car members)))]) + (let loop ([members members] [sgl* (sgl-regs)]) + (cond + [(null? members) '()] + [double? + (cons (car sgl*) (loop (cdr members) (cddr sgl*)))] + [else + (cons (car sgl*) (if (null? (cdr members)) + '() + (loop (cddr members) (cddr sgl*))))])))] + [else + ;; result is in %Cretval and maybe %r1 + (case ($ftd-size ftd) + [(8) (list %Cretval %r1)] + [else (list %Cretval)])]))] [else (list %r0)]))] [add-deactivate (lambda (adjust-active? t0 live* result-live* k) (cond - [adjust-active? - (%seq - (set! ,%ac0 ,t0) - ,(save-and-restore live* (%inline deactivate-thread)) - ,(k %ac0) - ,(save-and-restore result-live* `(set! ,%Cretval ,(%inline activate-thread))))] - [else (k t0)]))]) + [adjust-active? + (%seq + (set! ,%ac0 ,t0) + ,(save-and-restore live* (%inline deactivate-thread)) + ,(k %ac0) + ,(save-and-restore result-live* `(set! ,%Cretval ,(%inline activate-thread))))] + [else (k t0)]))]) (lambda (info) (safe-assert (reg-callee-save? %tc)) ; no need to save-restore (let* ([arg-type* (info-foreign-arg-type* info)] [conv* (info-foreign-conv* info)] [varargs? (ormap (lambda (conv) (and (pair? conv) (eq? (car conv) 'varargs))) conv*)] - [result-type (info-foreign-result-type info)] + [result-type (info-foreign-result-type info)] [result-reg* (get-result-regs result-type varargs?)] - [fill-result-here? (indirect-result-that-fits-in-registers? result-type)] + [fill-result-here? (indirect-result-that-fits-in-registers? result-type)] [adjust-active? (if-feature pthreads (memq 'adjust-active conv*) #f)]) (with-values (do-args (if fill-result-here? (cdr arg-type*) arg-type*) varargs?) (lambda (args-frame-size locs live*) (let* ([frame-size (align 8 (+ args-frame-size - (if fill-result-here? - 4 - 0)))] + (if fill-result-here? + 4 + 0)))] [adjust-frame (lambda (op) (lambda () (if (fx= frame-size 0) `(nop) `(set! ,%sp (inline ,null-info ,op ,%sp (immediate ,frame-size))))))]) (values - (adjust-frame %-) - (let ([locs (reverse locs)]) - (cond - [fill-result-here? - ;; stash extra argument on the stack to be retrieved after call and filled with the result: - (cons (load-int-stack args-frame-size) locs)] - [else locs])) - (lambda (t0 not-varargs?) - (add-fill-result fill-result-here? result-type args-frame-size - (add-deactivate adjust-active? t0 live* result-reg* - (lambda (t0) - `(inline ,(make-info-kill*-live* (add-caller-save-registers result-reg*) live*) ,%c-call ,t0))))) - (nanopass-case (Ltype Type) result-type - [(fp-double-float) - (if varargs? - (lambda (lvalue) ; unboxed - `(set! ,lvalue ,(%inline fpcastfrom ,%r1 ,%Cretval))) - (lambda (lvalue) ; unboxed - `(set! ,lvalue ,%Cfpretval)))] - [(fp-single-float) - (if varargs? - (lambda (lvalue) ; unboxed - (let ([t %Cfpretval]) ; should be ok as a temporary register - `(seq - (set! ,t ,(%inline fpcastfrom ,%r1 ,%Cretval)) ; we don't actually care about the hi/%r1 part - (set! ,lvalue ,(%inline single->double ,t))))) - (lambda (lvalue) ; unboxed - `(set! ,lvalue ,(%inline single->double ,%Cfpretval))))] - [(fp-integer ,bits) - (case bits - [(8) (lambda (lvalue) `(set! ,lvalue ,(%inline sext8 ,%r0)))] - [(16) (lambda (lvalue) `(set! ,lvalue ,(%inline sext16 ,%r0)))] - [(32) (lambda (lvalue) `(set! ,lvalue ,%r0))] - [(64) (lambda (lvlow lvhigh) - `(seq - (set! ,lvhigh ,%r1) - (set! ,lvlow ,%r0)))] - [else (sorry! who "unexpected asm-foreign-procedures fp-integer size ~s" bits)])] - [(fp-unsigned ,bits) - (case bits - [(8) (lambda (lvalue) `(set! ,lvalue ,(%inline zext8 ,%r0)))] - [(16) (lambda (lvalue) `(set! ,lvalue ,(%inline zext16 ,%r0)))] - [(32) (lambda (lvalue) `(set! ,lvalue ,%r0))] - [(64) (lambda (lvlow lvhigh) - `(seq - (set! ,lvhigh ,%r1) - (set! ,lvlow ,%r0)))] - [else (sorry! who "unexpected asm-foreign-procedures fp-unsigned size ~s" bits)])] - [else (lambda (lvalue) `(set! ,lvalue ,%r0))]) - (adjust-frame %+))) - ))))))) + (adjust-frame %-) + (let ([locs (reverse locs)]) + (cond + [fill-result-here? + ;; stash extra argument on the stack to be retrieved after call and filled with the result: + (cons (load-int-stack args-frame-size) locs)] + [else locs])) + (lambda (t0 not-varargs?) + (add-fill-result fill-result-here? result-type args-frame-size + (add-deactivate adjust-active? t0 live* result-reg* + (lambda (t0) + `(inline ,(make-info-kill*-live* (add-caller-save-registers result-reg*) live*) ,%c-call ,t0))))) + (nanopass-case (Ltype Type) result-type + [(fp-double-float) + (if varargs? + (lambda (lvalue) ; unboxed + `(set! ,lvalue ,(%inline fpcastfrom ,%r1 ,%Cretval))) + (lambda (lvalue) ; unboxed + `(set! ,lvalue ,%Cfpretval)))] + [(fp-single-float) + (if varargs? + (lambda (lvalue) ; unboxed + (let ([t %Cfpretval]) ; should be ok as a temporary register + `(seq + (set! ,t ,(%inline fpcastfrom ,%r1 ,%Cretval)) ; we don't actually care about the hi/%r1 part + (set! ,lvalue ,(%inline single->double ,t))))) + (lambda (lvalue) ; unboxed + `(set! ,lvalue ,(%inline single->double ,%Cfpretval))))] + [(fp-integer ,bits) + (case bits + [(8) (lambda (lvalue) `(set! ,lvalue ,(%inline sext8 ,%r0)))] + [(16) (lambda (lvalue) `(set! ,lvalue ,(%inline sext16 ,%r0)))] + [(32) (lambda (lvalue) `(set! ,lvalue ,%r0))] + [(64) (lambda (lvlow lvhigh) + `(seq + (set! ,lvhigh ,%r1) + (set! ,lvlow ,%r0)))] + [else (sorry! who "unexpected asm-foreign-procedures fp-integer size ~s" bits)])] + [(fp-unsigned ,bits) + (case bits + [(8) (lambda (lvalue) `(set! ,lvalue ,(%inline zext8 ,%r0)))] + [(16) (lambda (lvalue) `(set! ,lvalue ,(%inline zext16 ,%r0)))] + [(32) (lambda (lvalue) `(set! ,lvalue ,%r0))] + [(64) (lambda (lvlow lvhigh) + `(seq + (set! ,lvhigh ,%r1) + (set! ,lvlow ,%r0)))] + [else (sorry! who "unexpected asm-foreign-procedures fp-unsigned size ~s" bits)])] + [else (lambda (lvalue) `(set! ,lvalue ,%r0))]) + (adjust-frame %+)))))))))) (define-who asm-foreign-callable #| @@ -2935,15 +2938,15 @@ (lambda (offset) (lambda (lolvalue hilvalue) (%seq - (set! ,lolvalue ,(%mref ,%sp ,offset)) - (set! ,hilvalue ,(%mref ,%sp ,(fx+ offset 4))))))) - (define load-stack-address - (lambda (offset) - (lambda (lvalue) - `(set! ,lvalue ,(%inline + ,%sp (immediate ,offset)))))) + (set! ,lolvalue ,(%mref ,%sp ,offset)) + (set! ,hilvalue ,(%mref ,%sp ,(fx+ offset 4))))))) + (define load-stack-address + (lambda (offset) + (lambda (lvalue) + `(set! ,lvalue ,(%inline + ,%sp (immediate ,offset)))))) (define count-reg-args (lambda (types synthesize-first? varargs?) - ; bsgl? is #t iff we have a "b" single (second half of double) float reg to fill + ; bsgl? is #t iff we have a "b" single (second half of double) float reg to fill (let f ([types types] [iint (if synthesize-first? -1 0)] [idbl 0] [bsgl? #f]) (if (null? types) (values iint idbl) @@ -2956,48 +2959,48 @@ (f (cdr types) iint (fx+ idbl 1) bsgl?) (f (cdr types) iint idbl #f)))] [(fp-single-float) - (if varargs? + (if varargs? (f (cdr types) (if (fx< iint num-int-regs) (fx+ iint 1) iint) idbl bsgl?) - (if bsgl? + (if bsgl? (f (cdr types) iint idbl #f) (if (fx< idbl 8) - (f (cdr types) iint (fx+ idbl 1) #t) - (f (cdr types) iint idbl #f))))] - [(fp-ftd& ,ftd) - (let* ([size ($ftd-size ftd)] - [members ($ftd->members ftd)] - [num-members (length members)]) - (cond - [(and (fx<= num-members 4) - (not varargs?) - (andmap double-member? members)) - ;; doubles are either in registers or all on stack - (if (fx<= (fx+ idbl num-members) 8) - (f (cdr types) iint (fx+ idbl num-members) #f) - ;; no more floating-point registers should be used, but ok if we count more - (f (cdr types) iint idbl #f))] - [(and (fx<= num-members 4) - (not varargs?) - (andmap float-member? members)) - ;; floats are either in registers or all on stack - (let ([amt (fxsrl (align 2 (fx- num-members (if bsgl? 1 0))) 1)]) - (if (fx<= (fx+ idbl amt) 8) - (let ([odd-floats? (fxodd? num-members)]) - (if bsgl? - (f (cdr types) iint (+ idbl amt) (not odd-floats?)) - (f (cdr types) iint (+ idbl amt) odd-floats?))) - ;; no more floating-point registers should be used, but ok if we count more - (f (cdr types) iint idbl #f)))] - [(fx= 8 ($ftd-alignment ftd)) - (f (cdr types) (fxmin 4 (fx+ (align 2 iint) (fxsrl size 2))) idbl bsgl?)] - [else - (let ([size (align 4 size)]) - (f (cdr types) (fxmin 4 (fx+ iint (fxsrl size 2))) idbl bsgl?))]))] + (f (cdr types) iint (fx+ idbl 1) #t) + (f (cdr types) iint idbl #f))))] + [(fp-ftd& ,ftd) + (let* ([size ($ftd-size ftd)] + [members ($ftd->members ftd)] + [num-members (length members)]) + (cond + [(and (fx<= num-members 4) + (not varargs?) + (andmap double-member? members)) + ;; doubles are either in registers or all on stack + (if (fx<= (fx+ idbl num-members) 8) + (f (cdr types) iint (fx+ idbl num-members) #f) + ;; no more floating-point registers should be used, but ok if we count more + (f (cdr types) iint idbl #f))] + [(and (fx<= num-members 4) + (not varargs?) + (andmap float-member? members)) + ;; floats are either in registers or all on stack + (let ([amt (fxsrl (align 2 (fx- num-members (if bsgl? 1 0))) 1)]) + (if (fx<= (fx+ idbl amt) 8) + (let ([odd-floats? (fxodd? num-members)]) + (if bsgl? + (f (cdr types) iint (+ idbl amt) (not odd-floats?)) + (f (cdr types) iint (+ idbl amt) odd-floats?))) + ;; no more floating-point registers should be used, but ok if we count more + (f (cdr types) iint idbl #f)))] + [(fx= 8 ($ftd-alignment ftd)) + (f (cdr types) (fxmin 4 (fx+ (align 2 iint) (fxsrl size 2))) idbl bsgl?)] + [else + (let ([size (align 4 size)]) + (f (cdr types) (fxmin 4 (fx+ iint (fxsrl size 2))) idbl bsgl?))]))] [else (if (nanopass-case (Ltype Type) (car types) - [(fp-integer ,bits) (fx= bits 64)] - [(fp-unsigned ,bits) (fx= bits 64)] - [else #f]) + [(fp-integer ,bits) (fx= bits 64)] + [(fp-unsigned ,bits) (fx= bits 64)] + [else #f]) (let ([iint (align 2 iint)]) (f (cdr types) (if (fx< iint num-int-regs) (fx+ iint 2) iint) idbl bsgl?)) (f (cdr types) (if (fx< iint num-int-regs) (fx+ iint 1) iint) idbl bsgl?))]))))) @@ -3005,10 +3008,10 @@ ; all of the args are on the stack at this point, though not contiguous since ; we push all of the int reg args with one push instruction and all of the ; float reg args with another (v)push instruction; the saved int regs - ; continue on into the stack variables, which is convenient when a struct - ; argument is split across registers and the stack + ; continue on into the stack variables, which is convenient when a struct + ; argument is split across registers and the stack (lambda (types saved-reg-bytes pre-pad-bytes return-bytes float-reg-bytes post-pad-bytes int-reg-bytes - synthesize-first? varargs?) + synthesize-first? varargs?) (let* ([return-space-offset (fx+ saved-reg-bytes pre-pad-bytes)] [float-reg-offset (fx+ return-space-offset return-bytes)] [int-reg-offset (fx+ float-reg-offset float-reg-bytes post-pad-bytes)] @@ -3030,181 +3033,181 @@ (nanopass-case (Ltype Type) (car types) [(fp-double-float) (cond - [(and varargs? - ;; For varargs, treat a double like a 64-bit integer - (let ([iint (align 2 iint)]) - (and (fx< iint num-int-regs) - iint))) - => (lambda (new-iint) - (let ([int-reg-offset (if (fxeven? iint) int-reg-offset (fx+ int-reg-offset 4))] - [iint new-iint]) - (loop (cdr types) - (cons (load-double-stack int-reg-offset) locs) - (fx+ iint 2) idbl bsgl-offset (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)))] - [(and (not varargs?) - (< idbl num-dbl-regs)) - (loop (cdr types) - (cons (load-double-stack float-reg-offset) locs) - iint (fx+ idbl 1) bsgl-offset int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset)] - [else - (let ([stack-arg-offset (align 8 stack-arg-offset)] - [iint (if varargs? (align 2 iint) iint)]) ; use up register if argument didn't fit - (loop (cdr types) - (cons (load-double-stack stack-arg-offset) locs) - iint num-dbl-regs #f int-reg-offset float-reg-offset (fx+ stack-arg-offset 8)))])] + [(and varargs? + ;; For varargs, treat a double like a 64-bit integer + (let ([iint (align 2 iint)]) + (and (fx< iint num-int-regs) + iint))) + => (lambda (new-iint) + (let ([int-reg-offset (if (fxeven? iint) int-reg-offset (fx+ int-reg-offset 4))] + [iint new-iint]) + (loop (cdr types) + (cons (load-double-stack int-reg-offset) locs) + (fx+ iint 2) idbl bsgl-offset (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)))] + [(and (not varargs?) + (< idbl num-dbl-regs)) + (loop (cdr types) + (cons (load-double-stack float-reg-offset) locs) + iint (fx+ idbl 1) bsgl-offset int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset)] + [else + (let ([stack-arg-offset (align 8 stack-arg-offset)] + [iint (if varargs? (align 2 iint) iint)]) ; use up register if argument didn't fit + (loop (cdr types) + (cons (load-double-stack stack-arg-offset) locs) + iint num-dbl-regs #f int-reg-offset float-reg-offset (fx+ stack-arg-offset 8)))])] [(fp-single-float) (cond - [bsgl-offset + [bsgl-offset + (loop (cdr types) + (cons (load-single-stack bsgl-offset) locs) + iint idbl #f int-reg-offset float-reg-offset stack-arg-offset)] + [(and (< idbl num-dbl-regs) + (not varargs?)) (loop (cdr types) - (cons (load-single-stack bsgl-offset) locs) - iint idbl #f int-reg-offset float-reg-offset stack-arg-offset)] - [(and (< idbl num-dbl-regs) - (not varargs?)) + ; with big-endian ARM might need to adjust offset +/- 4 since pair of + ; single floats in a pushed double float might be reversed + (cons (load-single-stack float-reg-offset) locs) + iint (fx+ idbl 1) (fx+ float-reg-offset 4) int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset)] + [(and varargs? + (fx< iint num-int-regs)) (loop (cdr types) - ; with big-endian ARM might need to adjust offset +/- 4 since pair of - ; single floats in a pushed double float might be reversed - (cons (load-single-stack float-reg-offset) locs) - iint (fx+ idbl 1) (fx+ float-reg-offset 4) int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset)] - [(and varargs? - (fx< iint num-int-regs)) - (loop (cdr types) - (cons (load-single-stack int-reg-offset) locs) - (fx+ iint 1) idbl bsgl-offset (fx+ int-reg-offset 4) float-reg-offset stack-arg-offset)] - [else + (cons (load-single-stack int-reg-offset) locs) + (fx+ iint 1) idbl bsgl-offset (fx+ int-reg-offset 4) float-reg-offset stack-arg-offset)] + [else (loop (cdr types) - (cons (load-single-stack stack-arg-offset) locs) - iint num-dbl-regs #f int-reg-offset float-reg-offset (fx+ stack-arg-offset 4))])] - [(fp-ftd& ,ftd) - (let* ([size ($ftd-size ftd)] - [members ($ftd->members ftd)] - [num-members (length members)]) - (cond - [(and (not varargs?) + (cons (load-single-stack stack-arg-offset) locs) + iint num-dbl-regs #f int-reg-offset float-reg-offset (fx+ stack-arg-offset 4))])] + [(fp-ftd& ,ftd) + (let* ([size ($ftd-size ftd)] + [members ($ftd->members ftd)] + [num-members (length members)]) + (cond + [(and (not varargs?) (fx<= num-members 4) - (andmap double-member? members)) - ;; doubles are either in registers or all on stack - (if (fx<= (fx+ idbl num-members) num-dbl-regs) - (loop (cdr types) - (cons (load-stack-address float-reg-offset) locs) - iint (fx+ idbl num-members) #f int-reg-offset (fx+ float-reg-offset size) stack-arg-offset) - (let ([stack-arg-offset (align 8 stack-arg-offset)]) - (loop (cdr types) - (cons (load-stack-address stack-arg-offset) locs) - iint num-dbl-regs #f int-reg-offset #f (fx+ stack-arg-offset size))))] - [(and (not varargs?) + (andmap double-member? members)) + ;; doubles are either in registers or all on stack + (if (fx<= (fx+ idbl num-members) num-dbl-regs) + (loop (cdr types) + (cons (load-stack-address float-reg-offset) locs) + iint (fx+ idbl num-members) #f int-reg-offset (fx+ float-reg-offset size) stack-arg-offset) + (let ([stack-arg-offset (align 8 stack-arg-offset)]) + (loop (cdr types) + (cons (load-stack-address stack-arg-offset) locs) + iint num-dbl-regs #f int-reg-offset #f (fx+ stack-arg-offset size))))] + [(and (not varargs?) (fx<= num-members 4) - (andmap float-member? members)) - ;; floats are either in registers or all on stack - (let ([amt (fxsrl (align 2 (fx- num-members (if bsgl-offset 1 0))) 1)]) - (if (fx<= (fx+ idbl amt) num-dbl-regs) - (let ([odd-floats? (fxodd? num-members)]) - (if bsgl-offset - (let ([dbl-size (align 8 (fx- size 4))]) - (loop (cdr types) - (cons (load-stack-address bsgl-offset) locs) - iint (fx+ idbl amt) (if odd-floats? #f (+ bsgl-offset size)) int-reg-offset - (fx+ float-reg-offset dbl-size) stack-arg-offset)) - (let ([dbl-size (align 8 size)]) - (loop (cdr types) - (cons (load-stack-address float-reg-offset) locs) - iint (fx+ idbl amt) (and odd-floats? (fx+ float-reg-offset size)) int-reg-offset - (fx+ float-reg-offset dbl-size) stack-arg-offset)))) - (loop (cdr types) - (cons (load-stack-address stack-arg-offset) locs) - iint num-dbl-regs #f int-reg-offset float-reg-offset (fx+ stack-arg-offset size))))] - [(fx= 8 ($ftd-alignment ftd)) + (andmap float-member? members)) + ;; floats are either in registers or all on stack + (let ([amt (fxsrl (align 2 (fx- num-members (if bsgl-offset 1 0))) 1)]) + (if (fx<= (fx+ idbl amt) num-dbl-regs) + (let ([odd-floats? (fxodd? num-members)]) + (if bsgl-offset + (let ([dbl-size (align 8 (fx- size 4))]) + (loop (cdr types) + (cons (load-stack-address bsgl-offset) locs) + iint (fx+ idbl amt) (if odd-floats? #f (+ bsgl-offset size)) int-reg-offset + (fx+ float-reg-offset dbl-size) stack-arg-offset)) + (let ([dbl-size (align 8 size)]) + (loop (cdr types) + (cons (load-stack-address float-reg-offset) locs) + iint (fx+ idbl amt) (and odd-floats? (fx+ float-reg-offset size)) int-reg-offset + (fx+ float-reg-offset dbl-size) stack-arg-offset)))) + (loop (cdr types) + (cons (load-stack-address stack-arg-offset) locs) + iint num-dbl-regs #f int-reg-offset float-reg-offset (fx+ stack-arg-offset size))))] + [(fx= 8 ($ftd-alignment ftd)) (let ([int-reg-offset (if (fxeven? iint) int-reg-offset (fx+ int-reg-offset 4))] - [iint (align 2 iint)] - [amt (fxsrl size 2)]) + [iint (align 2 iint)] + [amt (fxsrl size 2)]) (if (fx< iint num-int-regs) ; argument starts in registers, may continue on stack (loop (cdr types) - (cons (load-stack-address int-reg-offset) locs) - (fxmin num-int-regs (fx+ iint amt)) idbl bsgl-offset (fx+ int-reg-offset size) float-reg-offset - (fx+ stack-arg-offset (fxmax 0 (fx* 4 (fx- (fx+ iint amt) 4))))) + (cons (load-stack-address int-reg-offset) locs) + (fxmin num-int-regs (fx+ iint amt)) idbl bsgl-offset (fx+ int-reg-offset size) float-reg-offset + (fx+ stack-arg-offset (fxmax 0 (fx* 4 (fx- (fx+ iint amt) 4))))) (let ([stack-arg-offset (align 8 stack-arg-offset)]) (loop (cdr types) - (cons (load-stack-address stack-arg-offset) locs) - iint idbl bsgl-offset int-reg-offset float-reg-offset (fx+ stack-arg-offset size)))))] - [else - (let* ([size (align 4 size)] - [amt (fxsrl size 2)]) - (if (fx< iint num-int-regs) ; argument starts in registers, may continue on stack - (loop (cdr types) - (cons (load-stack-address int-reg-offset) locs) - (fxmin num-int-regs (fx+ iint amt)) idbl bsgl-offset (fx+ int-reg-offset size) float-reg-offset - (fx+ stack-arg-offset (fxmax 0 (fx* 4 (fx- (fx+ iint amt) 4))))) - (loop (cdr types) - (cons (load-stack-address stack-arg-offset) locs) - iint idbl bsgl-offset int-reg-offset float-reg-offset (fx+ stack-arg-offset size))))]))] + (cons (load-stack-address stack-arg-offset) locs) + iint idbl bsgl-offset int-reg-offset float-reg-offset (fx+ stack-arg-offset size)))))] + [else + (let* ([size (align 4 size)] + [amt (fxsrl size 2)]) + (if (fx< iint num-int-regs) ; argument starts in registers, may continue on stack + (loop (cdr types) + (cons (load-stack-address int-reg-offset) locs) + (fxmin num-int-regs (fx+ iint amt)) idbl bsgl-offset (fx+ int-reg-offset size) float-reg-offset + (fx+ stack-arg-offset (fxmax 0 (fx* 4 (fx- (fx+ iint amt) 4))))) + (loop (cdr types) + (cons (load-stack-address stack-arg-offset) locs) + iint idbl bsgl-offset int-reg-offset float-reg-offset (fx+ stack-arg-offset size))))]))] [else (if (nanopass-case (Ltype Type) (car types) - [(fp-integer ,bits) (fx= bits 64)] - [(fp-unsigned ,bits) (fx= bits 64)] - [else #f]) + [(fp-integer ,bits) (fx= bits 64)] + [(fp-unsigned ,bits) (fx= bits 64)] + [else #f]) (let ([int-reg-offset (if (fxeven? iint) int-reg-offset (fx+ int-reg-offset 4))] - [iint (align 2 iint)]) + [iint (align 2 iint)]) (if (fx= iint num-int-regs) (let ([stack-arg-offset (align 8 stack-arg-offset)]) (loop (cdr types) - (cons (load-int64-stack stack-arg-offset) locs) - iint idbl bsgl-offset int-reg-offset float-reg-offset (fx+ stack-arg-offset 8))) + (cons (load-int64-stack stack-arg-offset) locs) + iint idbl bsgl-offset int-reg-offset float-reg-offset (fx+ stack-arg-offset 8))) (loop (cdr types) - (cons (load-int64-stack int-reg-offset) locs) - (fx+ iint 2) idbl bsgl-offset (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset))) + (cons (load-int64-stack int-reg-offset) locs) + (fx+ iint 2) idbl bsgl-offset (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset))) (if (fx= iint num-int-regs) (loop (cdr types) - (cons (load-int-stack (car types) stack-arg-offset) locs) - iint idbl bsgl-offset int-reg-offset float-reg-offset (fx+ stack-arg-offset 4)) + (cons (load-int-stack (car types) stack-arg-offset) locs) + iint idbl bsgl-offset int-reg-offset float-reg-offset (fx+ stack-arg-offset 4)) (loop (cdr types) - (cons (load-int-stack (car types) int-reg-offset) locs) - (fx+ iint 1) idbl bsgl-offset (fx+ int-reg-offset 4) float-reg-offset stack-arg-offset)))])))))) + (cons (load-int-stack (car types) int-reg-offset) locs) + (fx+ iint 1) idbl bsgl-offset (fx+ int-reg-offset 4) float-reg-offset stack-arg-offset)))])))))) (define do-result (lambda (result-type synthesize-first? varargs? return-stack-offset) - (nanopass-case (Ltype Type) result-type + (nanopass-case (Ltype Type) result-type [(fp-ftd& ,ftd) - (let* ([members ($ftd->members ftd)] - [num-members (length members)]) - (cond - [(and (not varargs?) + (let* ([members ($ftd->members ftd)] + [num-members (length members)]) + (cond + [(and (not varargs?) (fx<= 1 num-members 4) - (or (andmap double-member? members) - (andmap float-member? members))) - ;; double/float results returned in floating-point registers - (values - (lambda () - (let ([double? (and (pair? members) (double-member? (car members)))]) - (let loop ([members members] [sgl* (sgl-regs)] [offset return-stack-offset] [e #f]) - (cond - [(null? members) e] - [else - (loop (cdr members) - (if double? (cddr sgl*) (cdr sgl*)) - (fx+ offset (if double? 8 4)) - (let ([new-e + (or (andmap double-member? members) + (andmap float-member? members))) + ;; double/float results returned in floating-point registers + (values + (lambda () + (let ([double? (and (pair? members) (double-member? (car members)))]) + (let loop ([members members] [sgl* (sgl-regs)] [offset return-stack-offset] [e #f]) + (cond + [(null? members) e] + [else + (loop (cdr members) + (if double? (cddr sgl*) (cdr sgl*)) + (fx+ offset (if double? 8 4)) + (let ([new-e (if double? `(set! ,(car sgl*) ,(%mref ,%sp ,%zero ,offset fp)) `(set! ,(car sgl*) ,(%inline load-single ,(%mref ,%sp ,%zero ,offset fp))))]) - (if e `(seq ,e ,new-e) new-e)))])))) - (let ([double? (and (pair? members) (double-member? (car members)))]) - (let loop ([members members] [sgl* (sgl-regs)] [aligned? #t]) - (cond - [(null? members) '()] - [else (let ([regs (loop (cdr members) - (if double? (cddr sgl*) (cdr sgl*)) - (or double? (not aligned?)))]) - (if aligned? (cons (car sgl*) regs) regs))]))) - ($ftd-size ftd))] - [else - (case ($ftd-size ftd) - [(8) - (values (lambda () - `(seq - (set! ,%Cretval ,(%mref ,%sp ,return-stack-offset)) - (set! ,%r1 ,(%mref ,%sp ,(fx+ 4 return-stack-offset))))) - (list %Cretval %r1) - 8)] - [else - (values (lambda () + (if e `(seq ,e ,new-e) new-e)))])))) + (let ([double? (and (pair? members) (double-member? (car members)))]) + (let loop ([members members] [sgl* (sgl-regs)] [aligned? #t]) + (cond + [(null? members) '()] + [else (let ([regs (loop (cdr members) + (if double? (cddr sgl*) (cdr sgl*)) + (or double? (not aligned?)))]) + (if aligned? (cons (car sgl*) regs) regs))]))) + ($ftd-size ftd))] + [else + (case ($ftd-size ftd) + [(8) + (values (lambda () + `(seq + (set! ,%Cretval ,(%mref ,%sp ,return-stack-offset)) + (set! ,%r1 ,(%mref ,%sp ,(fx+ 4 return-stack-offset))))) + (list %Cretval %r1) + 8)] + [else + (values (lambda () (case ($ftd-size ftd) [(1) (let ([rep (if ($ftd-unsigned? ftd) 'unsigned-8 'integer-8)]) @@ -3213,25 +3216,25 @@ (let ([rep (if ($ftd-unsigned? ftd) 'unsigned-16 'integer-16)]) `(set! ,%Cretval (inline ,(make-info-load rep #f) ,%load ,%sp ,%zero (immediate ,return-stack-offset))))] [else `(set! ,%Cretval ,(%mref ,%sp ,return-stack-offset))])) - (list %Cretval) - 4)])]))] - [(fp-double-float) - (values (if varargs? + (list %Cretval) + 4)])]))] + [(fp-double-float) + (values (if varargs? (lambda (rhs) (let-values ([(endreg otherreg) (constant-case native-endianness - [(little) (values %Cretval %r1)] - [(big) (values %r1 %Cretval)])]) + [(little) (values %Cretval %r1)] + [(big) (values %r1 %Cretval)])]) `(seq (set! ,endreg ,(%mref ,rhs ,(constant flonum-data-disp))) (set! ,otherreg ,(%mref ,rhs ,(fx+ 4 (constant flonum-data-disp))))))) (lambda (rhs) `(set! ,%Cfpretval ,(%mref ,rhs ,%zero ,(constant flonum-data-disp) fp)))) - (if varargs? + (if varargs? (list %Cretval %r1) (list %Cfpretval)) - 0)] - [(fp-single-float) - (values (if varargs? + 0)] + [(fp-single-float) + (values (if varargs? (lambda (rhs) `(seq (set! ,%Cfpretval ,(%inline double->single ,(%mref ,rhs ,%zero ,(constant flonum-data-disp) fp))) @@ -3241,28 +3244,28 @@ (if varargs? (list %Cretval) (list %Cfpretval)) - 0)] + 0)] [(fp-void) (values (lambda () `(nop)) '() 0)] - [else - (cond - [(nanopass-case (Ltype Type) result-type + [else + (cond + [(nanopass-case (Ltype Type) result-type [(fp-integer ,bits) (fx= bits 64)] - [(fp-unsigned ,bits) (fx= bits 64)] - [else #f]) - (values (lambda (lo hi) - `(seq - (set! ,%Cretval ,lo) - (set! ,%r1 ,hi))) - (list %Cretval %r1) - 0)] - [else - (values (lambda (x) - `(set! ,%Cretval ,x)) - (list %Cretval) - 0)])]))) + [(fp-unsigned ,bits) (fx= bits 64)] + [else #f]) + (values (lambda (lo hi) + `(seq + (set! ,%Cretval ,lo) + (set! ,%r1 ,hi))) + (list %Cretval %r1) + 0)] + [else + (values (lambda (x) + `(set! ,%Cretval ,x)) + (list %Cretval) + 0)])]))) (lambda (info) (define callee-save-regs+lr (list %r4 %r5 %r6 %r7 %r8 %r9 %r10 %r11 %lr)) (define callee-save-fpregs (list %fp1 %fp2)) ; must be consecutive @@ -3277,73 +3280,74 @@ (let* ([arg-type* (info-foreign-arg-type* info)] [conv* (info-foreign-conv* info)] [varargs? (ormap (lambda (conv) (and (pair? conv) (eq? (car conv) 'varargs))) conv*)] - [result-type (info-foreign-result-type info)] + [result-type (info-foreign-result-type info)] [synthesize-first? (indirect-result-that-fits-in-registers? result-type)] [adjust-active? (if-feature pthreads (memq 'adjust-active conv*) #f)]) (let-values ([(iint idbl) (count-reg-args arg-type* synthesize-first? varargs?)]) (let ([saved-reg-bytes (fx+ (fx* isaved 4) (fx* fpsaved 8))] - [pre-pad-bytes (if (fxeven? isaved) + [pre-pad-bytes (if (fxeven? isaved) (if adjust-active? 8 0) 4)] [int-reg-bytes (fx* iint 4)] [post-pad-bytes (if (fxeven? iint) 0 4)] [float-reg-bytes (fx* idbl 8)]) - (let-values ([(get-result result-regs return-bytes) (do-result result-type synthesize-first? varargs? - (fx+ saved-reg-bytes pre-pad-bytes))]) + (let-values ([(get-result result-regs return-bytes) + (do-result result-type synthesize-first? varargs? + (fx+ saved-reg-bytes pre-pad-bytes))]) (let ([return-bytes (align 8 return-bytes)]) (values (lambda () - (%seq - ; save argument register values to the stack so we don't lose the values - ; across possible calls to C while setting up the tc and allocating memory - ,(if (fx= iint 0) `(nop) `(inline ,(make-info-kill*-live* '() (list-head (list %Carg1 %Carg2 %Carg3 %Carg4) iint)) ,%push-multiple)) - ; pad if necessary to force 8-byte boundary, and make room for indirect return: - ,(let ([len (+ post-pad-bytes return-bytes)]) - (if (fx= len 0) `(nop) `(set! ,%sp ,(%inline - ,%sp (immediate ,len))))) - ,(if (fx= idbl 0) `(nop) `(inline ,(make-info-vpush %Cfparg1 idbl) ,%vpush-multiple)) - ; pad if necessary to force 8-byte boundary after saving callee-save-regs+lr - ,(if (fx= pre-pad-bytes 0) `(nop) `(set! ,%sp ,(%inline - ,%sp (immediate ,pre-pad-bytes)))) - ; save the callee save registers & return address - (inline ,(make-info-kill*-live* '() callee-save-regs+lr) ,%push-multiple) - (inline ,(make-info-vpush (car callee-save-fpregs) fpsaved) ,%vpush-multiple) - ; maybe activate - ,(if adjust-active? - `(seq - (set! ,%Cretval ,(%inline activate-thread)) - (set! ,(%mref ,%sp ,saved-reg-bytes) ,%Cretval)) - `(nop)) - ; set up tc for benefit of argument-conversion code, which might allocate - ,(if-feature pthreads - (%seq - (set! ,%r0 ,(%inline get-tc)) - (set! ,%tc ,%r0)) - `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) - ; list of procedures that marshal arguments from their C stack locations - ; to the Scheme argument locations - (do-stack arg-type* saved-reg-bytes pre-pad-bytes return-bytes float-reg-bytes post-pad-bytes int-reg-bytes - synthesize-first? varargs?) - get-result - (lambda () - (in-context Tail - (%seq - ,(if adjust-active? - (%seq - ;; We need *(sp+saved-reg-bytes) in %Carg1, - ;; but that can also be a return register. - ;; Meanwhle, sp may change before we call unactivate. - ;; So, move to %r2 for now, then %Carg1 later: - (set! ,%r2 ,(%mref ,%sp ,saved-reg-bytes)) - ,(save-and-restore - result-regs - `(seq - (set! ,%Carg1 ,%r2) - ,(%inline unactivate-thread ,%Carg1)))) - `(nop)) - ; restore the callee save registers - (inline ,(make-info-vpush (car callee-save-fpregs) fpsaved) ,%vpop-multiple) - (inline ,(make-info-kill* callee-save-regs+lr) ,%pop-multiple) - ; deallocate space for pad & arg reg values - (set! ,%sp ,(%inline + ,%sp (immediate ,(fx+ pre-pad-bytes int-reg-bytes return-bytes post-pad-bytes float-reg-bytes)))) - ; done - (asm-c-return ,null-info ,callee-save-regs+lr ... ,result-regs ...))))))))))))))) + (%seq + ; save argument register values to the stack so we don't lose the values + ; across possible calls to C while setting up the tc and allocating memory + ,(if (fx= iint 0) `(nop) `(inline ,(make-info-kill*-live* '() (list-head (list %Carg1 %Carg2 %Carg3 %Carg4) iint)) ,%push-multiple)) + ; pad if necessary to force 8-byte boundary, and make room for indirect return: + ,(let ([len (+ post-pad-bytes return-bytes)]) + (if (fx= len 0) `(nop) `(set! ,%sp ,(%inline - ,%sp (immediate ,len))))) + ,(if (fx= idbl 0) `(nop) `(inline ,(make-info-vpush %Cfparg1 idbl) ,%vpush-multiple)) + ; pad if necessary to force 8-byte boundary after saving callee-save-regs+lr + ,(if (fx= pre-pad-bytes 0) `(nop) `(set! ,%sp ,(%inline - ,%sp (immediate ,pre-pad-bytes)))) + ; save the callee save registers & return address + (inline ,(make-info-kill*-live* '() callee-save-regs+lr) ,%push-multiple) + (inline ,(make-info-vpush (car callee-save-fpregs) fpsaved) ,%vpush-multiple) + ; maybe activate + ,(if adjust-active? + `(seq + (set! ,%Cretval ,(%inline activate-thread)) + (set! ,(%mref ,%sp ,saved-reg-bytes) ,%Cretval)) + `(nop)) + ; set up tc for benefit of argument-conversion code, which might allocate + ,(if-feature pthreads + (%seq + (set! ,%r0 ,(%inline get-tc)) + (set! ,%tc ,%r0)) + `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) + ; list of procedures that marshal arguments from their C stack locations + ; to the Scheme argument locations + (do-stack arg-type* saved-reg-bytes pre-pad-bytes return-bytes float-reg-bytes post-pad-bytes int-reg-bytes + synthesize-first? varargs?) + get-result + (lambda () + (in-context Tail + (%seq + ,(if adjust-active? + (%seq + ;; We need *(sp+saved-reg-bytes) in %Carg1, + ;; but that can also be a return register. + ;; Meanwhle, sp may change before we call unactivate. + ;; So, move to %r2 for now, then %Carg1 later: + (set! ,%r2 ,(%mref ,%sp ,saved-reg-bytes)) + ,(save-and-restore + result-regs + `(seq + (set! ,%Carg1 ,%r2) + ,(%inline unactivate-thread ,%Carg1)))) + `(nop)) + ; restore the callee save registers + (inline ,(make-info-vpush (car callee-save-fpregs) fpsaved) ,%vpop-multiple) + (inline ,(make-info-kill* callee-save-regs+lr) ,%pop-multiple) + ; deallocate space for pad & arg reg values + (set! ,%sp ,(%inline + ,%sp (immediate ,(fx+ pre-pad-bytes int-reg-bytes return-bytes post-pad-bytes float-reg-bytes)))) + ; done + (asm-c-return ,null-info ,callee-save-regs+lr ... ,result-regs ...))))))))))))))) ) diff --git a/s/arm64.ss b/s/arm64.ss index 104a4ef70..6810e41c4 100644 --- a/s/arm64.ss +++ b/s/arm64.ss @@ -324,7 +324,7 @@ (seq `(set! ,(make-live-info) ,u (immediate ,offset)) `(set! ,(make-live-info) ,z (asm ,info ,(asm-add #f) ,x ,u))))]))))) - + (define-instruction value lea1 ;; NB: would be simpler if offset were explicit operand ;; NB: why not one version of lea with %zero for y in lea1 case? @@ -378,7 +378,7 @@ (seq `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-sub #f) ,y ,w)) (k x u imm-zero))))] - [else + [else (let ([u (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,u (immediate ,n)) @@ -460,7 +460,7 @@ (define-instruction value (fpcastto) [(op (x mem) (y fpur)) `(set! ,(make-live-info) ,(mem->mem x 'fp) ,y)] [(op (x ur) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpcastto ,y))]) - + (define-instruction value (fpcastfrom) [(op (x fpmem) (y ur)) `(set! ,(make-live-info) ,(mem->mem x 'uptr) ,y)] [(op (x fpur) (y ur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpcastfrom ,y))])) @@ -535,7 +535,7 @@ `(asm ,info ,asm-unactivate-thread ,x ,ulr)))]) (define-instruction value (asmlibcall) - [(op (z ur)) + [(op (z ur)) (if (info-asmlib-save-ra? info) `(set! ,(make-live-info) ,z (asm ,info ,(asm-library-call (info-asmlib-libspec info) #t) ,(info-kill*-live*-live* info) ...)) (let ([ulr (make-precolored-unspillable 'ulr %lr)]) @@ -620,7 +620,7 @@ (seq `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-add #f) ,x ,y)) (add-offset u))))))) - ;; NB: compiler implements init-lock! and unlock! as word store of zero + ;; NB: compiler implements init-lock! and unlock! as word store of zero (define-instruction pred (lock!) [(op (x ur) (y ur) (w imm-constant)) (let ([u (make-tmp 'u)] @@ -647,11 +647,11 @@ [(op (x ur) (y ur) (w imm-constant) (old ur) (new ur)) (lea->reg x y w (lambda (r) - (let ([u1 (make-tmp 'u1)] [u2 (make-tmp 'u2)]) + (let ([u1 (make-tmp 'u1)] [u2 (make-tmp 'u2)]) (seq `(set! ,(make-live-info) ,u1 (asm ,null-info ,asm-kill)) `(set! ,(make-live-info) ,u2 (asm ,null-info ,asm-kill)) - `(asm ,info ,asm-cas ,r ,old ,new ,u1 ,u2)))))])) + `(asm ,info ,asm-cas ,r ,old ,new ,u1 ,u2)))))])) (define-instruction effect (store-store-fence) [(op) @@ -793,7 +793,7 @@ (define-op add binary-op #b0) (define-op sub binary-op #b1) - + (define-op and logical-op #b00) (define-op orr logical-op #b01) (define-op eor logical-op #b10) @@ -801,7 +801,7 @@ (define-op cmp cmp-op #b1101011 #b00 0) (define-op tst cmp-op #b1101010 #b00 0) (define-op cmp/asr63 cmp-op #b1101011 #b10 63) - + (define-op cmpi cmp-imm-op #b1) ; selector is at bit 30 (op) (define-op cmni cmp-imm-op #b0) (define-op tsti logical-imm-op #b11 #f `(reg . ,%real-zero)) @@ -811,11 +811,11 @@ (define-op mvn mov-op #b1 #b1) (define-op lsli shifti-op #b10 'l) ; selector is at bit 29 (opc) - (define-op lsri shifti-op #b10 'r) + (define-op lsri shifti-op #b10 'r) (define-op asri shifti-op #b00 'r) (define-op lsl shift-op #b00) ; selector is at bit 10 (op2) - (define-op lsr shift-op #b01) + (define-op lsr shift-op #b01) (define-op asr shift-op #b10) (define-op sxtb extend-op #b100 #b1 #b000111) ; selectors are at bits 29 (sfc+opc), 22 (N), and 10 (imms) @@ -869,7 +869,7 @@ (define-op sturwi load-unscaled-imm-op #b10 #b0 #b00) (define-op sturfi load-unscaled-imm-op #b11 #b1 #b00) (define-op sturfsi load-unscaled-imm-op #b10 #b1 #b00) ; single-precision - + (define-op ldr load-op #b11 #b0 #b01) ; selectors are at bits 30 (size), 26, and 22 (opc) (define-op ldrw load-op #b10 #b0 #b01) (define-op ldrh load-op #b01 #b0 #b01) @@ -1280,15 +1280,15 @@ (define-who branch-label-op (lambda (op cond-bits dest code*) (define (emit-branch offset) - (safe-assert (branch-disp? (+ offset 4))) - (emit-code (op dest code*) + (safe-assert (branch-disp? (+ offset 4))) + (emit-code (op dest code*) [24 #b01010100] - [5 (fxand (fxsra (fx+ offset 4) 2) (fx- (fxsll 1 19) 1))] - [4 #b0] - [0 cond-bits])) + [5 (fxand (fxsra (fx+ offset 4) 2) (fx- (fxsll 1 19) 1))] + [4 #b0] + [0 cond-bits])) (record-case dest [(label) (offset l) (emit-branch offset)] - [(imm) (n) (emit-branch n)] ; generated for long branches + [(imm) (n) (emit-branch n)] ; generated for long branches [else (sorry! who "unexpected dest ~s" dest)]))) (define adr-op @@ -1566,17 +1566,17 @@ (define branch-disp? (lambda (x) - (and (fixnum? x) + (and (fixnum? x) (fx<= (- (expt 2 20)) x (- (expt 2 20) 1)) (not (fxlogtest x #b11))))) (define uncond-branch-disp? (lambda (x) (let ([x (+ x 4)]) ; because `branch-always-label-op` adds 4 - (and (fixnum? x) + (and (fixnum? x) (fx<= (- (expt 2 27)) x (- (expt 2 27) 1)) (not (fxlogtest x #b11)))))) - + (define asm-size (lambda (x) (case (car x) @@ -1593,7 +1593,7 @@ code*)))))) (define ax-movi - (lambda (dest n code*) + (lambda (dest n code*) (cond [(shifted16 n) => (lambda (imm+shift) @@ -1908,7 +1908,7 @@ (define asm-fpcastto (lambda (code* dest src) (Trivit (dest src) - (emit fmov.f->g dest src code*)))) + (emit fmov.f->g dest src code*)))) (define asm-fpcastfrom (lambda (code* dest src) @@ -2108,7 +2108,7 @@ (rec asm-c-simple-call-internal (lambda (code* . ignore) (asm-helper-call code* target save-ra?)))))) - + (define-who asm-indirect-call (lambda (code* dest lr . ignore) (safe-assert (eq? lr %lr)) @@ -2238,7 +2238,7 @@ (define build-bop-seq (lambda (bop opnd1 opnd2 l2 body) #`(let ([code* (emit #,bop #,opnd1 code*)]) - (safe-assert (= (asm-size* code*) #,b-asm-size)) + (safe-assert (= (asm-size* code*) #,b-asm-size)) (let-values ([(ignore #,opnd2) (get-disp-opnd (fx+ next-addr #,b-asm-size) #,l2)]) #,body)))) (define ops->code @@ -2268,8 +2268,8 @@ (handle-reverse #'c1 #'opnd2 #'l2))] [else (let ([code* #,(build-bop-seq #'b #'opnd1 #'opnd2 #'l2 - #'(emit b opnd2 code*))]) - #,(handle-reverse #'c2 #``(imm #,b-asm-size) #'step))])] + #'(emit b opnd2 code*))]) + #,(handle-reverse #'c2 #``(imm #,b-asm-size) #'step))])] [_ ($oops 'handle-inverse "expected an inverse in ~s" e)]))) (syntax-case x () [(_ [(pred ...) cl-body] ...) @@ -2346,16 +2346,16 @@ ;; When `n` fits in a fixnum, the compiler may generate ;; a bad shift that is under a guard, so force it to 63 bits (let ([n (fxand n 63)]) - (cond - [(fx= n 0) - ;; shift by 0 is just a move - (emit mov dest src0 code*)] - [else - (case op - [(sll) (emit lsli dest src0 n code*)] - [(srl) (emit lsri dest src0 n code*)] - [(sra) (emit asri dest src0 n code*)] - [else (sorry! 'shiftop "unrecognized ~s" op)])]))] + (cond + [(fx= n 0) + ;; shift by 0 is just a move + (emit mov dest src0 code*)] + [else + (case op + [(sll) (emit lsli dest src0 n code*)] + [(srl) (emit lsri dest src0 n code*)] + [(sra) (emit asri dest src0 n code*)] + [else (sorry! 'shiftop "unrecognized ~s" op)])]))] [else (case op [(sll) (emit lsl dest src0 src1 code*)] @@ -2377,7 +2377,7 @@ (emit fmov.f->g dest tmp code*))))))) (define asm-enter values) - + (define-who asm-inc-cc-counter (lambda (code* addr val tmp) (Trivit (addr val tmp) @@ -2396,26 +2396,26 @@ (lambda (code*) (emit addi #f tmp tmp 1 code*)) code*)))))) - + (module (asm-foreign-call asm-foreign-callable) (define align (lambda (b x) (let ([k (- b 1)]) (fxlogand (fx+ x k) (fxlognot k))))) (define (double-member? m) (and (eq? (car m) 'float) - (fx= (cadr m) 8))) + (fx= (cadr m) 8))) (define (float-member? m) (and (eq? (car m) 'float) - (fx= (cadr m) 4))) + (fx= (cadr m) 4))) (define (indirect-result-that-fits-in-registers? result-type) (nanopass-case (Ltype Type) result-type [(fp-ftd& ,ftd) - (let* ([members ($ftd->members ftd)] - [num-members (length members)]) - (or (fx<= ($ftd-size ftd) 4) - (and (fx= num-members 1) - ;; a struct containing only int64 is not returned in a register - (or (not ($ftd-compound? ftd)))) - (and (fx<= num-members 4) - (or (andmap double-member? members) - (andmap float-member? members)))))] - [else #f])) + (let* ([members ($ftd->members ftd)] + [num-members (length members)]) + (or (fx<= ($ftd-size ftd) 4) + (and (fx= num-members 1) + ;; a struct containing only int64 is not returned in a register + (or (not ($ftd-compound? ftd)))) + (and (fx<= num-members 4) + (or (andmap double-member? members) + (andmap float-member? members)))))] + [else #f])) (define int-argument-regs (list %Carg1 %Carg2 %Carg3 %Carg4 %Carg5 %Carg6 %Carg7 %Carg8)) (define fp-argument-regs (list %Cfparg1 %Cfparg2 %Cfparg3 %Cfparg4 @@ -2423,27 +2423,27 @@ (define save-and-restore (lambda (regs e) (safe-assert (andmap reg? regs)) - (with-output-language (L13 Effect) + (with-output-language (L13 Effect) (let ([save-and-restore-gp - (lambda (regs e) + (lambda (regs e) (let* ([regs (filter (lambda (r) (not (eq? (reg-type r) 'fp))) regs)]) (cond [(null? regs) e] [else (%seq - (inline ,(make-info-kill*-live* '() regs) ,%push-multiple) - ,e - (inline ,(make-info-kill*-live* regs '()) ,%pop-multiple))])))] - [save-and-restore-fp - (lambda (regs e) + (inline ,(make-info-kill*-live* '() regs) ,%push-multiple) + ,e + (inline ,(make-info-kill*-live* regs '()) ,%pop-multiple))])))] + [save-and-restore-fp + (lambda (regs e) (let ([fp-regs (filter (lambda (r) (eq? (reg-type r) 'fp)) regs)]) (cond [(null? fp-regs) e] [else (%seq - (inline ,(make-info-kill*-live* '() fp-regs) ,%push-fpmultiple) - ,e - (inline ,(make-info-kill*-live* fp-regs '()) ,%pop-fpmultiple))])))]) + (inline ,(make-info-kill*-live* '() fp-regs) ,%push-fpmultiple) + ,e + (inline ,(make-info-kill*-live* fp-regs '()) ,%pop-fpmultiple))])))]) (save-and-restore-gp regs (save-and-restore-fp regs e)))))) (define-record-type cat @@ -2795,29 +2795,29 @@ [(fp-double-float) (cond [(eq? 'fp (cat-place cat)) - (loop types cats + (loop types cats (cons (load-double-reg (car (cat-regs cat))) locs) isp ind-sp)] [(eq? 'int (cat-place cat)) - (loop types cats + (loop types cats (cons (load-double-into-int-reg (car (cat-regs cat))) locs) isp ind-sp)] [else - (loop types cats + (loop types cats (cons (load-double-stack isp) locs) (fx+ isp (cat-size cat) (cat-pad cat)) ind-sp)])] [(fp-single-float) (cond [(eq? 'fp (cat-place cat)) - (loop types cats + (loop types cats (cons (load-single-reg (car (cat-regs cat))) locs) isp ind-sp)] [(eq? 'int (cat-place cat)) - (loop types cats + (loop types cats (cons (load-single-into-int-reg (car (cat-regs cat))) locs) isp ind-sp)] [else - (loop types cats + (loop types cats (cons (load-single-stack isp) locs) (fx+ isp (cat-size cat) (cat-pad cat)) ind-sp)])] [(fp-ftd& ,ftd) @@ -2899,9 +2899,9 @@ (loop types cats (cons (load-int-stack isp (cat-size cat)) locs) (fx+ isp (cat-size cat) (cat-pad cat)) ind-sp)])])))))] - [add-fill-result + [add-fill-result ;; may destroy the values in result registers - (lambda (result-cat result-type args-frame-size fill-result-here? e) + (lambda (result-cat result-type args-frame-size fill-result-here? e) (nanopass-case (Ltype Type) result-type [(fp-ftd& ,ftd) (let* ([size ($ftd-size ftd)] @@ -2912,22 +2912,22 @@ ;; result is in integer registers (let loop ([int* (cat-regs result-cat)] [offset 0] [size size]) (cond - [(null? int*) `(seq ,e (set! ,tmp ,(%mref ,%sp ,args-frame-size)))] - [else - (%seq ,(loop (cdr int*) (fx+ offset 8) (fx- size 8)) - ,(reg-to-memory tmp offset (fxmin size 8) (car int*)))]))] + [(null? int*) `(seq ,e (set! ,tmp ,(%mref ,%sp ,args-frame-size)))] + [else + (%seq ,(loop (cdr int*) (fx+ offset 8) (fx- size 8)) + ,(reg-to-memory tmp offset (fxmin size 8) (car int*)))]))] [(fp) ;; result is in fp registers, so going to either double or float elements (let* ([double? (double-member? (car ($ftd->members ftd)))]) (let loop ([fp* (cat-regs result-cat)] [offset 0]) (cond - [(null? fp*) `(seq ,e (set! ,tmp ,(%mref ,%sp ,args-frame-size)))] - [double? - (%seq ,(loop (cdr fp*) (fx+ offset 8)) - (set! ,(%mref ,tmp ,%zero ,offset fp) ,(car fp*)))] - [else - (%seq ,(loop (cdr fp*) (fx+ offset 4)) - ,(%inline store-single ,(%mref ,tmp ,%zero ,offset fp) ,(car fp*)))])))] + [(null? fp*) `(seq ,e (set! ,tmp ,(%mref ,%sp ,args-frame-size)))] + [double? + (%seq ,(loop (cdr fp*) (fx+ offset 8)) + (set! ,(%mref ,tmp ,%zero ,offset fp) ,(car fp*)))] + [else + (%seq ,(loop (cdr fp*) (fx+ offset 4)) + ,(%inline store-single ,(%mref ,tmp ,%zero ,offset fp) ,(car fp*)))])))] [else ;; we passed the pointer to be filled, so nothing more to do here e]))] @@ -2937,13 +2937,13 @@ [add-deactivate (lambda (adjust-active? t0 live* result-live* k) (cond - [adjust-active? - (%seq - (set! ,%ac0 ,t0) - ,(save-and-restore live* (%inline deactivate-thread)) - ,(k %ac0) - ,(save-and-restore result-live* `(set! ,%Cretval ,(%inline activate-thread))))] - [else (k t0)]))]) + [adjust-active? + (%seq + (set! ,%ac0 ,t0) + ,(save-and-restore live* (%inline deactivate-thread)) + ,(k %ac0) + ,(save-and-restore result-live* `(set! ,%Cretval ,(%inline activate-thread))))] + [else (k t0)]))]) (lambda (info) (safe-assert (reg-callee-save? %tc)) ; no need to save-restore (let* ([arg-type* (info-foreign-arg-type* info)] @@ -2956,9 +2956,9 @@ arg-type*)] [conv* (info-foreign-conv* info)] [arg-cat* (categorize-arguments arg-type* (extract-varargs-after-conv conv*))] - [result-cat (car (categorize-arguments (list result-type) #f))] + [result-cat (car (categorize-arguments (list result-type) #f))] [result-reg* (cat-regs result-cat)] - [fill-result-here? (and ftd-result? + [fill-result-here? (and ftd-result? (not (cat-indirect-bytes result-cat)) (not (eq? 'stack (cat-place result-cat))))] [arg-stack-bytes (align 16 (compute-stack-argument-space arg-cat*))] @@ -2983,13 +2983,13 @@ (adjust-frame %-) (let ([locs (reverse locs)]) (cond - [fill-result-here? - ;; stash extra argument on the stack to be retrieved after call and filled with the result: - (cons (load-int-stack (fx+ arg-stack-bytes indirect-stack-bytes) 8) locs)] - [ftd-result? - ;; callee expects pointer to fill for return in %r8: - (cons (lambda (rhs) `(set! ,%r8 ,rhs)) locs)] - [else locs])) + [fill-result-here? + ;; stash extra argument on the stack to be retrieved after call and filled with the result: + (cons (load-int-stack (fx+ arg-stack-bytes indirect-stack-bytes) 8) locs)] + [ftd-result? + ;; callee expects pointer to fill for return in %r8: + (cons (lambda (rhs) `(set! ,%r8 ,rhs)) locs)] + [else locs])) (lambda (t0 not-varargs?) (add-fill-result result-cat result-type (fx+ arg-stack-bytes indirect-stack-bytes) fill-result-here? (add-deactivate adjust-active? t0 live* result-reg* @@ -3028,24 +3028,24 @@ | incoming stack args | | | +---------------------------+<- 16-byte boundary - | saved int reg args | + | saved int reg args | | + %r8 for indirect | - | + maybe padding | + | + maybe padding | +---------------------------+<- 16-byte boundary - | | + | | | saved float reg args | - | + maybe padding | + | + maybe padding | +---------------------------+<- 16-byte boundary - | | - | activatation state | + | | + | activatation state | | if necessary | +---------------------------+<- 16-byte boundary - | | + | | | &-return space | | if necessary | +---------------------------+<- 16-byte boundary | | - | callee-save regs + lr | + | callee-save regs + lr | | callee-save fpregs | +---------------------------+<- 16-byte boundary |# @@ -3084,10 +3084,10 @@ [(64) `(set! ,lvalue ,(%mref ,%sp ,offset))] [else (sorry! who "unexpected load-int-stack fp-unsigned size ~s" bits)])] [else `(set! ,lvalue ,(%mref ,%sp ,offset))])))) - (define load-stack-address - (lambda (offset) - (lambda (lvalue) - `(set! ,lvalue ,(%inline + ,%sp (immediate ,offset)))))) + (define load-stack-address + (lambda (offset) + (lambda (lvalue) + `(set! ,lvalue ,(%inline + ,%sp (immediate ,offset)))))) (define do-args ;; all of the args are on the stack at this point, though not contiguous since ;; we push all of the int reg args with one set of push instructions and all of the @@ -3100,181 +3100,180 @@ [int-reg-offset (if indirect-result? (fx+ init-int-reg-offset 8) init-int-reg-offset)] [float-reg-offset float-reg-offset] [stack-arg-offset stack-arg-offset]) - (if (null? types) - (let ([locs (reverse locs)]) - (cond - [synthesize-first? - (cons (load-stack-address return-offset) locs)] - [indirect-result? - (cons (load-word-stack init-int-reg-offset) locs)] - [else locs])) - (let ([cat (car cats)] - [type (car types)] - [cats (cdr cats)] - [types (cdr types)]) - (nanopass-case (Ltype Type) type - [(fp-double-float) - (case (cat-place cat) - [(fp) - (loop types cats - (cons (load-double-stack float-reg-offset) locs) - int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset)] - [(int) - (loop types cats - (cons (load-double-stack int-reg-offset) locs) - (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)] - [else - (loop types cats - (cons (load-double-stack stack-arg-offset) locs) - int-reg-offset float-reg-offset (fx+ stack-arg-offset (cat-size cat) (cat-pad cat)))])] - [(fp-single-float) - (case (cat-place cat) - [(fp) - (loop types cats - (cons (load-single-stack float-reg-offset) locs) - int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset)] - [(int) - (loop types cats - (cons (load-single-stack int-reg-offset) locs) - (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)] - [else - (loop types cats - (cons (load-single-stack stack-arg-offset) locs) - int-reg-offset float-reg-offset (fx+ stack-arg-offset (cat-size cat) (cat-pad cat)))])] - - [(fp-ftd& ,ftd) - (case (cat-place cat) - [(int) - (let ([indirect-bytes (cat-indirect-bytes cat)]) - (cond - [indirect-bytes - ;; pointer to an indirect argument - (safe-assert (fx= (length (cat-regs cat)) 1)) - (loop types cats - (cons (load-word-stack int-reg-offset) locs) - (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)] - [else - ;; point to argument on stack - (loop types cats - (cons (load-stack-address int-reg-offset) locs) - (fx+ int-reg-offset (cat-size cat) (cat-pad cat)) float-reg-offset stack-arg-offset)]))] - [(fp) - ;; point to argument, but if they're floats, then we need to - ;; shift double-sized registers into float-sized elements - (loop types cats - (cons (let ([proc (load-stack-address float-reg-offset)] - [members ($ftd->members ftd)]) - (cond - [(or (null? (cdr members)) - (double-member? (car members))) - proc] - [else - ;; instead of compacting here, it might be nicer to - ;; save registers in packed form in the first place - ;; (which means that `(cat-size cat)` would be a multiple of 4) - (lambda (lvalue) - (let loop ([members (cdr members)] - [dest-offset (fx+ float-reg-offset 4)] - [src-offset (fx+ float-reg-offset 8)]) - (if (null? members) - (proc lvalue) - (let ([tmp %argtmp]) - (%seq - (set! ,tmp (inline ,(make-info-load 'unsigned-32 #f) ,%load ,%sp ,%zero (immediate ,src-offset))) - (inline ,(make-info-load 'unsigned-32 #f) ,%store ,%sp ,%zero (immediate ,dest-offset) ,%argtmp) - ,(loop (cdr members) (fx+ dest-offset 4) (fx+ src-offset 8)))))))])) - locs) - int-reg-offset (fx+ float-reg-offset (cat-size cat) (cat-pad cat)) stack-arg-offset)] - [else - (let ([indirect-bytes (cat-indirect-bytes cat)]) - (cond - [indirect-bytes - ;; pointer (passed on stack) to an indirect argument (also on stack) - (safe-assert (fx= (cat-size cat) 8)) - (loop types cats - (cons (load-word-stack stack-arg-offset) locs) - int-reg-offset float-reg-offset (fx+ stack-arg-offset 8))] - [else - ;; point to argument on stack - (loop types cats - (cons (load-stack-address stack-arg-offset) locs) - int-reg-offset float-reg-offset (fx+ stack-arg-offset (cat-size cat) (cat-pad cat)))]))])] - [else - ;; integer, scheme-object, etc. - (case (cat-place cat) - [(int) - (loop types cats - (cons (load-int-stack type int-reg-offset) locs) - (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)] - [else - (loop types cats - (cons (load-int-stack type stack-arg-offset) locs) - int-reg-offset float-reg-offset (fx+ stack-arg-offset (cat-size cat) (cat-pad cat)))])])))))) + (if (null? types) + (let ([locs (reverse locs)]) + (cond + [synthesize-first? + (cons (load-stack-address return-offset) locs)] + [indirect-result? + (cons (load-word-stack init-int-reg-offset) locs)] + [else locs])) + (let ([cat (car cats)] + [type (car types)] + [cats (cdr cats)] + [types (cdr types)]) + (nanopass-case (Ltype Type) type + [(fp-double-float) + (case (cat-place cat) + [(fp) + (loop types cats + (cons (load-double-stack float-reg-offset) locs) + int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset)] + [(int) + (loop types cats + (cons (load-double-stack int-reg-offset) locs) + (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)] + [else + (loop types cats + (cons (load-double-stack stack-arg-offset) locs) + int-reg-offset float-reg-offset (fx+ stack-arg-offset (cat-size cat) (cat-pad cat)))])] + [(fp-single-float) + (case (cat-place cat) + [(fp) + (loop types cats + (cons (load-single-stack float-reg-offset) locs) + int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset)] + [(int) + (loop types cats + (cons (load-single-stack int-reg-offset) locs) + (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)] + [else + (loop types cats + (cons (load-single-stack stack-arg-offset) locs) + int-reg-offset float-reg-offset (fx+ stack-arg-offset (cat-size cat) (cat-pad cat)))])] + [(fp-ftd& ,ftd) + (case (cat-place cat) + [(int) + (let ([indirect-bytes (cat-indirect-bytes cat)]) + (cond + [indirect-bytes + ;; pointer to an indirect argument + (safe-assert (fx= (length (cat-regs cat)) 1)) + (loop types cats + (cons (load-word-stack int-reg-offset) locs) + (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)] + [else + ;; point to argument on stack + (loop types cats + (cons (load-stack-address int-reg-offset) locs) + (fx+ int-reg-offset (cat-size cat) (cat-pad cat)) float-reg-offset stack-arg-offset)]))] + [(fp) + ;; point to argument, but if they're floats, then we need to + ;; shift double-sized registers into float-sized elements + (loop types cats + (cons (let ([proc (load-stack-address float-reg-offset)] + [members ($ftd->members ftd)]) + (cond + [(or (null? (cdr members)) + (double-member? (car members))) + proc] + [else + ;; instead of compacting here, it might be nicer to + ;; save registers in packed form in the first place + ;; (which means that `(cat-size cat)` would be a multiple of 4) + (lambda (lvalue) + (let loop ([members (cdr members)] + [dest-offset (fx+ float-reg-offset 4)] + [src-offset (fx+ float-reg-offset 8)]) + (if (null? members) + (proc lvalue) + (let ([tmp %argtmp]) + (%seq + (set! ,tmp (inline ,(make-info-load 'unsigned-32 #f) ,%load ,%sp ,%zero (immediate ,src-offset))) + (inline ,(make-info-load 'unsigned-32 #f) ,%store ,%sp ,%zero (immediate ,dest-offset) ,%argtmp) + ,(loop (cdr members) (fx+ dest-offset 4) (fx+ src-offset 8)))))))])) + locs) + int-reg-offset (fx+ float-reg-offset (cat-size cat) (cat-pad cat)) stack-arg-offset)] + [else + (let ([indirect-bytes (cat-indirect-bytes cat)]) + (cond + [indirect-bytes + ;; pointer (passed on stack) to an indirect argument (also on stack) + (safe-assert (fx= (cat-size cat) 8)) + (loop types cats + (cons (load-word-stack stack-arg-offset) locs) + int-reg-offset float-reg-offset (fx+ stack-arg-offset 8))] + [else + ;; point to argument on stack + (loop types cats + (cons (load-stack-address stack-arg-offset) locs) + int-reg-offset float-reg-offset (fx+ stack-arg-offset (cat-size cat) (cat-pad cat)))]))])] + [else + ;; integer, scheme-object, etc. + (case (cat-place cat) + [(int) + (loop types cats + (cons (load-int-stack type int-reg-offset) locs) + (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)] + [else + (loop types cats + (cons (load-int-stack type stack-arg-offset) locs) + int-reg-offset float-reg-offset (fx+ stack-arg-offset (cat-size cat) (cat-pad cat)))])])))))) (define do-result (lambda (result-type result-cat synthesize-first? return-stack-offset) - (nanopass-case (Ltype Type) result-type + (nanopass-case (Ltype Type) result-type [(fp-double-float) - (lambda (rhs) + (lambda (rhs) `(set! ,%Cfpretval ,(%mref ,rhs ,%zero ,(constant flonum-data-disp) fp)))] - [(fp-single-float) + [(fp-single-float) (lambda (rhs) `(set! ,%Cfpretval ,(%inline double->single ,(%mref ,rhs ,%zero ,(constant flonum-data-disp) fp))))] [(fp-void) (lambda () `(nop))] [(fp-ftd& ,ftd) (cond - [(cat-indirect-bytes result-cat) - ;; we passed the pointer to be filled, so nothing more to do here - (lambda () `(nop))] - [else - (case (cat-place result-cat) - [(int) - (let ([to-regs - (lambda (x offset) - (let loop ([int* (cat-regs result-cat)] [offset offset] [size ($ftd-size ftd)]) - (cond - [(null? int*) `(nop)] + [(cat-indirect-bytes result-cat) + ;; we passed the pointer to be filled, so nothing more to do here + (lambda () `(nop))] + [else + (case (cat-place result-cat) + [(int) + (let ([to-regs + (lambda (x offset) + (let loop ([int* (cat-regs result-cat)] [offset offset] [size ($ftd-size ftd)]) + (cond + [(null? int*) `(nop)] + [else + (safe-assert (not (eq? (car int*) x))) + (%seq + ,(loop (cdr int*) (fx+ offset 8) (fx- size 8)) + ,(memory-to-reg (car int*) x offset (fxmin size 8) ($ftd-unsigned? ftd) %argtmp))])))]) + (if synthesize-first? + (lambda () + (to-regs %sp return-stack-offset)) + (lambda (x) + (to-regs x 0))))] + [(fp) + (let* ([double? (double-member? (car ($ftd->members ftd)))]) + (let ([to-regs + (lambda (x offset) + (let loop ([fp* (cat-regs result-cat)] [offset offset]) + (cond + [(null? fp*) `(nop)] + [double? + (%seq ,(loop (cdr fp*) (fx+ offset 8)) + (set! ,(car fp*) ,(%mref ,x ,%zero ,offset fp)))] [else - (safe-assert (not (eq? (car int*) x))) - (%seq - ,(loop (cdr int*) (fx+ offset 8) (fx- size 8)) - ,(memory-to-reg (car int*) x offset (fxmin size 8) ($ftd-unsigned? ftd) %argtmp))])))]) - (if synthesize-first? - (lambda () - (to-regs %sp return-stack-offset)) - (lambda (x) - (to-regs x 0))))] - [(fp) - (let* ([double? (double-member? (car ($ftd->members ftd)))]) - (let ([to-regs - (lambda (x offset) - (let loop ([fp* (cat-regs result-cat)] [offset offset]) - (cond - [(null? fp*) `(nop)] - [double? - (%seq ,(loop (cdr fp*) (fx+ offset 8)) - (set! ,(car fp*) ,(%mref ,x ,%zero ,offset fp)))] - [else - (%seq ,(loop (cdr fp*) (fx+ offset 4)) - (set! ,(car fp*) ,(%inline load-single ,(%mref ,x ,%zero ,offset fp))))])))]) - (if synthesize-first? - (lambda () - (to-regs %sp return-stack-offset)) - (lambda (x) - (to-regs x 0)))))] - [else - ;; we passed the pointer to be filled, so nothing more to do here - (lambda () `(nop))])])] + (%seq ,(loop (cdr fp*) (fx+ offset 4)) + (set! ,(car fp*) ,(%inline load-single ,(%mref ,x ,%zero ,offset fp))))])))]) + (if synthesize-first? + (lambda () + (to-regs %sp return-stack-offset)) + (lambda (x) + (to-regs x 0)))))] + [else + ;; we passed the pointer to be filled, so nothing more to do here + (lambda () `(nop))])])] [else ;; integer, scheme-object, etc. (lambda (x) `(set! ,%Cretval ,x))]))) (lambda (info) (define callee-save-regs+lr (cons* %lr - ;; reserved: - %tc %sfp %ap %trap - ;; allocable: - (get-allocable-callee-save-regs 'uptr))) + ;; reserved: + %tc %sfp %ap %trap + ;; allocable: + (get-allocable-callee-save-regs 'uptr))) (define callee-save-fpregs (get-allocable-callee-save-regs 'fp)) (define isaved (length callee-save-regs+lr)) (define fpsaved (length callee-save-fpregs)) @@ -3286,7 +3285,7 @@ [arg-type* (if ftd-result? (cdr arg-type*) arg-type*)] - [conv* (info-foreign-conv* info)] + [conv* (info-foreign-conv* info)] [arg-cat* (categorize-arguments arg-type* (extract-varargs-after-conv conv*))] [result-cat (car (categorize-arguments (list result-type) #f))] [synthesize-first? (and ftd-result? @@ -3333,7 +3332,7 @@ `(nop)) ;; set up tc for benefit of argument-conversion code, which might allocate ,(if-feature pthreads - (%seq + (%seq (set! ,%Cretval ,(%inline get-tc)) (set! ,%tc ,%Cretval)) `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) diff --git a/s/cpprim.ss b/s/cpprim.ss index da6669c7f..92c667c07 100644 --- a/s/cpprim.ss +++ b/s/cpprim.ss @@ -38,14 +38,14 @@ (define rtd-ancestry (csv7:record-field-accessor #!base-rtd 'ancestry)) -;; After the `np-expand-primitives` pass, some expression produce +;; After the `np-expand-primitives` pass, some expressions produce ;; double (i.e., floating-point) values instead of pointer values. ;; Those expression results always flow to an `inline` primitive ;; that expects double values. The main consequence is that a later ;; pass must only put such returns in a temporary with type 'fp. ; TODO: recognize a direct call when it is at the end of a sequence, closures, or let form -; TODO: push call into if? (would need to pull arguments into temporaries to ensure order of evaluation +; TODO: push call into if? (would need to pull arguments into temporaries to ensure order of evaluation) ; TODO: how does this interact with mvcall? (module (np-expand-primitives) (define-threaded new-l*) @@ -99,7 +99,7 @@ (single-valued? e)) e (with-output-language (L7 Expr) - (let ([t (make-tmp 'v)]) + (let ([t (make-tmp 'v)]) `(values ,(make-info-call #f #f #f #f #f) ,e))))] [(e) (ensure-single-valued e (fx= (optimize-level) 3))])) (define-pass np-expand-primitives : L7 (ir) -> L9 () diff --git a/s/ppc32.ss b/s/ppc32.ss index f4ad89610..8ff255474 100644 --- a/s/ppc32.ss +++ b/s/ppc32.ss @@ -696,10 +696,10 @@ [(op (x ur) (y ur) (w imm-constant) (old ur) (new ur)) (lea->reg x y w (lambda (base index) - (let ([u (make-tmp 'u)]) + (let ([u (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,u (asm ,null-info ,asm-kill)) - `(asm ,info ,asm-cas ,base ,index ,old ,new ,u)))))])) + `(asm ,info ,asm-cas ,base ,index ,old ,new ,u)))))])) (define-instruction effect (pause) [(op) `(asm ,info ,asm-isync)]) @@ -2075,19 +2075,19 @@ (emit nop (emit nop (emit nop - (emit nop + (emit nop (asm-helper-relocation reloc code*)))))))))) (define asm-helper-relocation (lambda (reloc code*) (cons* reloc (aop-cons* `(asm "relocation:" ,reloc) code*)))) - (define asm-return - (lambda () + (define asm-return + (lambda () (emit blr '()))) - (define asm-c-return - (lambda (info) + (define asm-c-return + (lambda (info) (emit blr '()))) (define asm-lognot @@ -2102,7 +2102,7 @@ (Trivit (bit) (let ([b (ax-imm-data bit)]) (emit creqv b b b code*))))) - + (define-who asm-inc-cc-counter (lambda (code* addr val tmp) (assert (not (eq? tmp %zero))) @@ -2126,14 +2126,14 @@ (Trivit (src base idx/off) (record-case idx/off [(imm) (n) (emit stwu src base `(imm ,n) code*)] - [else (emit stwux src base idx/off code*)])))) + [else (emit stwux src base idx/off code*)])))) (define asm-get-lr (lambda () (lambda (code* dest) (Trivit (dest) (emit mflr dest code*))))) - + (define asm-set-lr (lambda () (lambda (code* src) @@ -2143,7 +2143,7 @@ (define asm-isync (lambda (code*) (emit isync code*))) - + (module (asm-foreign-call asm-foreign-callable) (define align (lambda (b x) (let ([k (- b 1)]) (fxlogand (fx+ x k) (fxlognot k))))) (define gp-parameter-regs (lambda () (list %Carg1 %Carg2 %Carg3 %Carg4 %Carg5 %Carg6 %Carg7 %Carg8))) @@ -2158,7 +2158,7 @@ (define (indirect-result-that-fits-in-registers? result-type) (nanopass-case (Ltype Type) result-type [(fp-ftd& ,ftd) (not ($ftd-compound? ftd))] - [else #f])) + [else #f])) (define (indirect-result-to-pointer result-type arg-type*) (constant-case machine-type-name [(ppc32osx tppc32osx) @@ -2174,24 +2174,24 @@ (module (push-registers pop-registers) ;; stack offset must be 8-byte aligned if fp-reg-count is non-zero (define (move-registers regs fp-reg-count fp-regs load? offset e) - (with-output-language (L13 Effect) + (with-output-language (L13 Effect) (cond - [(fx> fp-reg-count 0) - ;; Push floating-point first to get correct alignment - (let ([offset (align 8 offset)]) - (move-registers regs (fx- fp-reg-count 1) (cdr fp-regs) load? (fx+ offset 8) - (cond - [load? `(seq ,e (set! ,(car fp-regs) ,(%mref ,%sp ,%zero ,offset fp)))] - [else `(seq (set! ,(%mref ,%sp ,%zero ,offset fp) ,(car fp-regs)) ,e)])))] - [(pair? regs) - (move-registers (cdr regs) 0 '() load? (fx+ offset 4) - (cond - [load? `(seq ,e (set! ,(car regs) ,(%mref ,%sp ,offset)))] - [else `(seq (set! ,(%mref ,%sp ,offset) ,(car regs)) ,e)]))] - [else e]))) + [(fx> fp-reg-count 0) + ;; Push floating-point first to get correct alignment + (let ([offset (align 8 offset)]) + (move-registers regs (fx- fp-reg-count 1) (cdr fp-regs) load? (fx+ offset 8) + (cond + [load? `(seq ,e (set! ,(car fp-regs) ,(%mref ,%sp ,%zero ,offset fp)))] + [else `(seq (set! ,(%mref ,%sp ,%zero ,offset fp) ,(car fp-regs)) ,e)])))] + [(pair? regs) + (move-registers (cdr regs) 0 '() load? (fx+ offset 4) + (cond + [load? `(seq ,e (set! ,(car regs) ,(%mref ,%sp ,offset)))] + [else `(seq (set! ,(%mref ,%sp ,offset) ,(car regs)) ,e)]))] + [else e]))) ;; Add "pushes" before e (define (push-registers regs fp-reg-count fp-regs offset e) - (move-registers regs fp-reg-count fp-regs #f offset e)) + (move-registers regs fp-reg-count fp-regs #f offset e)) ;; Add "pops" after e (define (pop-registers regs fp-reg-count fp-regs offset e) (move-registers regs fp-reg-count fp-regs #t offset e))) @@ -2225,17 +2225,17 @@ (define load-indirect-int-stack (lambda (offset size) (lambda (rhs) ; requires rhs - (let ([int-type (case size - [(1) 'integer-8] - [(2) 'integer-16] - [else 'integer-32])]) + (let ([int-type (case size + [(1) 'integer-8] + [(2) 'integer-16] + [else 'integer-32])]) `(set! ,(%mref ,%sp ,offset) (inline ,(make-info-load int-type #f) ,%load ,rhs ,%zero (immediate ,0))))))) (define load-indirect-int64-stack (lambda (offset) (lambda (x) ; requires var `(seq - (set! ,(%mref ,%sp ,offset) ,(%mref ,x 0)) - (set! ,(%mref ,%sp ,(fx+ offset 4)) ,(%mref ,x 4)))))) + (set! ,(%mref ,%sp ,offset) ,(%mref ,x 0)) + (set! ,(%mref ,%sp ,(fx+ offset 4)) ,(%mref ,x 4)))))) (define load-double-reg (lambda (fpreg fp-disp) (if fp-disp @@ -2332,8 +2332,8 @@ (lambda (loreg hireg) (lambda (x) ; requires var `(seq - (set! ,hireg ,(%mref ,x 0)) - (set! ,loreg ,(%mref ,x 4)))))) + (set! ,hireg ,(%mref ,x 0)) + (set! ,loreg ,(%mref ,x 4)))))) (define load-indirect-int64-reg+stack (lambda (hi offset) (lambda (rhs) ; requires var @@ -2500,7 +2500,7 @@ ;; floating-point in a union is passed in integer registers: (and ($ftd-union? ftd) (eq? 'float (caar members)))) - ;; compound: use integer registers until we run out; + ;; compound: use integer registers until we run out; ;; for simplicity, just put the whole argument (not just ;; the part after registers) on the stack, too, which ;; handles things like sizes not divisible by 4 or unions @@ -2721,11 +2721,11 @@ (cons (car int*) live*) (cdr int*) flt* isp fp-live-count #f)))])))))]) (define (plan-result result-type fill-result-here? fill-stash-offset) - (if (constant software-floating-point) + (if (constant software-floating-point) (let () (define handle-64-bit (lambda () - (values (reg-list %Cretval-high %Cretval-low) 0 (lambda (e) e)))) + (values (reg-list %Cretval-high %Cretval-low) 0 (lambda (e) e)))) (define handle-32-bit (lambda () (values (reg-list %Cretval) 0 (lambda (e) e)))) @@ -2735,25 +2735,25 @@ [(8 16 32) (handle-32-bit)] [(64) (handle-64-bit)] [else (sorry! who "unexpected asm-foreign-procedures fp-integer size ~s" bits)]))) - (define (handle-ftd&-case ftd) - (cond - [fill-result-here? - (let-values ([(result-live* result-fp-live-count make) (if (> ($ftd-size ftd) 4) - (handle-64-bit) - (handle-32-bit))]) - (values result-live* result-fp-live-count - (lambda (e) - (%seq - ,(make e) - ,(do-indirect-result-from-registers ftd fill-stash-offset)))))] - [else (values (reg-list) 0 (lambda (e) e))])) + (define (handle-ftd&-case ftd) + (cond + [fill-result-here? + (let-values ([(result-live* result-fp-live-count make) (if (> ($ftd-size ftd) 4) + (handle-64-bit) + (handle-32-bit))]) + (values result-live* result-fp-live-count + (lambda (e) + (%seq + ,(make e) + ,(do-indirect-result-from-registers ftd fill-stash-offset)))))] + [else (values (reg-list) 0 (lambda (e) e))])) (nanopass-case (Ltype Type) result-type - [(fp-double-float) (handle-64-bit)] - [(fp-single-float) (handle-32-bit)] - [(fp-integer ,bits) (handle-integer-cases bits)] - [(fp-integer ,bits) (handle-integer-cases bits)] - [(fp-ftd& ,ftd) (handle-ftd&-case ftd)] - [else (values (reg-list %Cretval) 0 (lambda (e) e))])) + [(fp-double-float) (handle-64-bit)] + [(fp-single-float) (handle-32-bit)] + [(fp-integer ,bits) (handle-integer-cases bits)] + [(fp-integer ,bits) (handle-integer-cases bits)] + [(fp-ftd& ,ftd) (handle-ftd&-case ftd)] + [else (values (reg-list %Cretval) 0 (lambda (e) e))])) (let () (define handle-integer-cases (lambda (bits) @@ -2761,72 +2761,72 @@ [(8 16 32) (values (reg-list %Cretval) 0 (lambda (e) e))] [(64) (values (reg-list %Cretval-high %Cretval-low) 0 (lambda (e) e))] [else (sorry! who "unexpected asm-foreign-procedures fp-integer size ~s" bits)]))) - (define (handle-ftd&-case ftd) - (cond - [fill-result-here? - (let-values ([(result-live* result-fp-live-count make) - (if (not (eq? 'float ($ftd-atomic-category ftd))) - (handle-integer-cases (* 8 ($ftd-size ftd))) - (values (reg-list) 1 (lambda (e) e)))]) - (values - result-live* - result-fp-live-count - (lambda (e) - (%seq - ,(make e) - ,(do-indirect-result-from-registers ftd fill-stash-offset)))))] - [else (values (reg-list) 0 (lambda (e) e))])) + (define (handle-ftd&-case ftd) + (cond + [fill-result-here? + (let-values ([(result-live* result-fp-live-count make) + (if (not (eq? 'float ($ftd-atomic-category ftd))) + (handle-integer-cases (* 8 ($ftd-size ftd))) + (values (reg-list) 1 (lambda (e) e)))]) + (values + result-live* + result-fp-live-count + (lambda (e) + (%seq + ,(make e) + ,(do-indirect-result-from-registers ftd fill-stash-offset)))))] + [else (values (reg-list) 0 (lambda (e) e))])) (nanopass-case (Ltype Type) result-type - [(fp-double-float) (values (reg-list) 1 (lambda (e) e))] - [(fp-single-float) (values (reg-list) 1 (lambda (e) e))] - [(fp-integer ,bits) (handle-integer-cases bits)] - [(fp-unsigned ,bits) (handle-integer-cases bits)] - [(fp-ftd& ,ftd) (handle-ftd&-case ftd)] - [else (values (reg-list %Cretval) 0 (lambda (e) e))])))) - (define do-indirect-result-from-registers - (lambda (ftd offset) - (let ([tmp %Carg8]) - (%seq - (set! ,tmp ,(%mref ,%sp ,offset)) - ,(cond - [(and (not (constant software-floating-point)) - (eq? 'float ($ftd-atomic-category ftd))) + [(fp-double-float) (values (reg-list) 1 (lambda (e) e))] + [(fp-single-float) (values (reg-list) 1 (lambda (e) e))] + [(fp-integer ,bits) (handle-integer-cases bits)] + [(fp-unsigned ,bits) (handle-integer-cases bits)] + [(fp-ftd& ,ftd) (handle-ftd&-case ftd)] + [else (values (reg-list %Cretval) 0 (lambda (e) e))])))) + (define do-indirect-result-from-registers + (lambda (ftd offset) + (let ([tmp %Carg8]) + (%seq + (set! ,tmp ,(%mref ,%sp ,offset)) + ,(cond + [(and (not (constant software-floating-point)) + (eq? 'float ($ftd-atomic-category ftd))) (if (= 4 ($ftd-size ftd)) (%inline store-double->single ,(%mref ,tmp ,%zero 0 fp) ,%Cfpretval) `(set! ,(%mref ,tmp ,%zero 0 fp) ,%Cfpretval))] - [else - (case ($ftd-size ftd) - [(1) `(inline ,(make-info-load 'integer-8 #f) ,%store ,tmp ,%zero (immediate 0) ,%Cretval)] - [(2) `(inline ,(make-info-load 'integer-16 #f) ,%store ,tmp ,%zero (immediate 0) ,%Cretval)] - [(4) `(inline ,(make-info-load 'integer-32 #f) ,%store ,tmp ,%zero (immediate 0) ,%Cretval)] - [(8) - (%seq - (inline ,(make-info-load 'integer-32 #f) ,%store ,tmp ,%zero (immediate 0) ,%Cretval-high) - (inline ,(make-info-load 'integer-32 #f) ,%store ,tmp ,%zero (immediate 4) ,%Cretval-low))] - [else (sorry! who "unexpected result size")])]))))) - (define (add-deactivate t0 offset live* fp-live-count result-live* result-fp-live-count e) - (let ([save-and-restore - (lambda (regs fp-count fp-regs e) - (cond - [(and (null? regs) (fx= 0 fp-count)) e] - [else - (pop-registers regs fp-count fp-regs offset - (push-registers regs fp-count fp-regs offset - e))]))]) - (%seq - (set! ,%deact ,t0) - ,(save-and-restore (cons %deact live*) fp-live-count (fp-parameter-regs) (%inline deactivate-thread)) - ,e - ,(save-and-restore result-live* result-fp-live-count (fp-result-regs) `(set! ,%Cretval ,(%inline activate-thread)))))) + [else + (case ($ftd-size ftd) + [(1) `(inline ,(make-info-load 'integer-8 #f) ,%store ,tmp ,%zero (immediate 0) ,%Cretval)] + [(2) `(inline ,(make-info-load 'integer-16 #f) ,%store ,tmp ,%zero (immediate 0) ,%Cretval)] + [(4) `(inline ,(make-info-load 'integer-32 #f) ,%store ,tmp ,%zero (immediate 0) ,%Cretval)] + [(8) + (%seq + (inline ,(make-info-load 'integer-32 #f) ,%store ,tmp ,%zero (immediate 0) ,%Cretval-high) + (inline ,(make-info-load 'integer-32 #f) ,%store ,tmp ,%zero (immediate 4) ,%Cretval-low))] + [else (sorry! who "unexpected result size")])]))))) + (define (add-deactivate t0 offset live* fp-live-count result-live* result-fp-live-count e) + (let ([save-and-restore + (lambda (regs fp-count fp-regs e) + (cond + [(and (null? regs) (fx= 0 fp-count)) e] + [else + (pop-registers regs fp-count fp-regs offset + (push-registers regs fp-count fp-regs offset + e))]))]) + (%seq + (set! ,%deact ,t0) + ,(save-and-restore (cons %deact live*) fp-live-count (fp-parameter-regs) (%inline deactivate-thread)) + ,e + ,(save-and-restore result-live* result-fp-live-count (fp-result-regs) `(set! ,%Cretval ,(%inline activate-thread)))))) (lambda (info) (safe-assert (reg-callee-save? %tc)) ; no need to save-restore (let* ([varargs? (not (memq 'atomic (info-foreign-conv* info)))] ; pessimistic for Mac OS [really-varargs? (ormap (lambda (conv) (and (pair? conv) (eq? 'varargs (car conv)))) (info-foreign-conv* info))] [arg-type* (info-foreign-arg-type* info)] - [result-type (info-foreign-result-type info)] - [fill-result-here? (indirect-result-that-fits-in-registers? result-type)] - [adjust-active? (if-feature pthreads (memq 'adjust-active (info-foreign-conv* info)) #f)]) + [result-type (info-foreign-result-type info)] + [fill-result-here? (indirect-result-that-fits-in-registers? result-type)] + [adjust-active? (if-feature pthreads (memq 'adjust-active (info-foreign-conv* info)) #f)]) (with-values (do-args (if fill-result-here? (cdr arg-type*) (indirect-result-to-pointer result-type arg-type*)) varargs?) (lambda (orig-frame-size locs live* fp-live-count) @@ -2834,41 +2834,41 @@ (let-values ([(result-live* result-fp-live-count make-call) (plan-result result-type fill-result-here? fill-stash-offset)]) (let* ([base-frame-size (fx+ orig-frame-size (if fill-result-here? 4 0))] - [deactivate-save-offset (if (and adjust-active? + [deactivate-save-offset (if (and adjust-active? (or (fx> fp-live-count 0) (fx> result-fp-live-count 0))) - (align 8 base-frame-size) ; for `double` save - base-frame-size)] - [frame-size (align 16 (if adjust-active? - (fx+ deactivate-save-offset - (fx* (fxmax fp-live-count result-fp-live-count) 8) - (fx* (fxmax (add1 (length live*)) (length result-live*)) 4)) - deactivate-save-offset))]) + (align 8 base-frame-size) ; for `double` save + base-frame-size)] + [frame-size (align 16 (if adjust-active? + (fx+ deactivate-save-offset + (fx* (fxmax fp-live-count result-fp-live-count) 8) + (fx* (fxmax (add1 (length live*)) (length result-live*)) 4)) + deactivate-save-offset))]) (values (lambda () (%inline store-with-update ,%Csp ,%Csp (immediate ,(fx- frame-size)))) (let ([locs (reverse locs)]) - (cond - [fill-result-here? - ;; stash extra argument on the stack to be retrieved after call and filled with the result: - (cons (load-int-stack fill-stash-offset) locs)] - [else locs])) + (cond + [fill-result-here? + ;; stash extra argument on the stack to be retrieved after call and filled with the result: + (cons (load-int-stack fill-stash-offset) locs)] + [else locs])) (lambda (t0 not-varargs?) (define (add-crset e) (constant-case machine-type-name - [(ppc32osx tppc32osx) e] - [else - (if (and really-varargs? (not (fx= 0 fp-live-count))) - `(seq - ,(%inline set-cr-bit (immediate 6)) - ,e) - e)])) + [(ppc32osx tppc32osx) e] + [else + (if (and really-varargs? (not (fx= 0 fp-live-count))) + `(seq + ,(%inline set-cr-bit (immediate 6)) + ,e) + e)])) (let ([kill* (add-caller-save-registers result-live*)]) (make-call - (cond - [adjust-active? - (add-deactivate t0 deactivate-save-offset live* fp-live-count result-live* result-fp-live-count - (add-crset `(inline ,(make-info-kill*-live* kill* live*) ,%c-call ,%deact)))] - [else (add-crset `(inline ,(make-info-kill*-live* kill* live*) ,%c-call ,t0))])))) + (cond + [adjust-active? + (add-deactivate t0 deactivate-save-offset live* fp-live-count result-live* result-fp-live-count + (add-crset `(inline ,(make-info-kill*-live* kill* live*) ,%c-call ,%deact)))] + [else (add-crset `(inline ,(make-info-kill*-live* kill* live*) ,%c-call ,t0))])))) (nanopass-case (Ltype Type) result-type [(fp-double-float) (lambda (lvalue) ; unboxed @@ -2920,7 +2920,7 @@ sp+n+4: | | +---------------------------+ | | - f's frame | back chain | 1 word + f's frame | back chain | 1 word sp+n: | | <--------------------------------+ +---------------------------+ | +---------------------------+ | @@ -2991,7 +2991,7 @@ | | +---------------------------+ | | - | floating-point arg regs | + | floating-point arg regs | | | +---------------------------+ <- 8-byte aligned | | @@ -3080,16 +3080,16 @@ (define load-stack-address (lambda (offset) (lambda (lvalue) - `(set! ,lvalue ,(%inline + ,%sp (immediate ,offset)))))) + `(set! ,lvalue ,(%inline + ,%sp (immediate ,offset)))))) (define load-stack-address/convert-float (lambda (offset) (lambda (lvalue) - (%seq - ;; Overwrite argument on stack with single-precision version - ;; FIXME: is the callee allowed to do this if the argument is passed on the stack? - (set! ,%fptmp1 ,(%mref ,%sp ,%zero ,offset fp)) + (%seq + ;; Overwrite argument on stack with single-precision version + ;; FIXME: is the callee allowed to do this if the argument is passed on the stack? + (set! ,%fptmp1 ,(%mref ,%sp ,%zero ,offset fp)) ,(%inline store-double->single ,(%mref ,%sp ,%zero ,offset fp) ,%fptmp1) - (set! ,lvalue ,(%inline + ,%sp (immediate ,offset))))))) + (set! ,lvalue ,(%inline + ,%sp (immediate ,offset))))))) (constant-case machine-type-name [(ppc32osx tppc32osx) (define register+stack-arguments-starting-offset @@ -3318,108 +3318,108 @@ locs) locs)) (cond - [(and (not (constant software-floating-point)) - (nanopass-case (Ltype Type) (car types) - [(fp-double-float) #t] - [(fp-single-float) #t] - [else #f])) - (if (fx< iflt fp-reg-count) - (loop (cdr types) - (cons (load-double-stack float-reg-offset) locs) - iint (fx+ iflt 1) int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset) - (let ([stack-arg-offset (align 8 stack-arg-offset)]) - (loop (cdr types) - (cons (load-double-stack stack-arg-offset) locs) - iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 8))))] - [(and (constant software-floating-point) - (nanopass-case (Ltype Type) (car types) - [(fp-double-float) #t] - [else #f])) - (let ([iint (align 2 iint)]) - (if (fx< iint gp-reg-count) - (let ([int-reg-offset (align 8 int-reg-offset)]) - (loop (cdr types) - (cons (load-double-stack int-reg-offset) locs) - (fx+ iint 2) iflt (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)) - (let ([stack-arg-offset (align 8 stack-arg-offset)]) - (loop (cdr types) - (cons (load-double-stack stack-arg-offset) locs) - iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 8)))))] - [(and (constant software-floating-point) - (nanopass-case (Ltype Type) (car types) - [(fp-single-float) #t] - [else #f])) - (if (fx< iint gp-reg-count) - (loop (cdr types) - (cons (load-soft-single-stack int-reg-offset) locs) - (fx+ iint 1) iflt (fx+ int-reg-offset 4) float-reg-offset stack-arg-offset) - (loop (cdr types) - (cons (load-soft-single-stack stack-arg-offset) locs) - iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 4)))] - [(nanopass-case (Ltype Type) (car types) - [(fp-ftd& ,ftd) (not ($ftd-compound? ftd))] - [else #f]) - ;; load pointer to address on the stack - (let ([ftd (nanopass-case (Ltype Type) (car types) - [(fp-ftd& ,ftd) ftd])]) - (case (and (not (constant software-floating-point)) - ($ftd-atomic-category ftd)) - [(float) - (let ([load-address (case ($ftd-size ftd) - [(4) load-stack-address/convert-float] - [else load-stack-address])]) - (if (fx< iflt fp-reg-count) - (loop (cdr types) - (cons (load-address float-reg-offset) locs) - iint (fx+ iflt 1) int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset) - (let ([stack-arg-offset (align 8 stack-arg-offset)]) + [(and (not (constant software-floating-point)) + (nanopass-case (Ltype Type) (car types) + [(fp-double-float) #t] + [(fp-single-float) #t] + [else #f])) + (if (fx< iflt fp-reg-count) + (loop (cdr types) + (cons (load-double-stack float-reg-offset) locs) + iint (fx+ iflt 1) int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset) + (let ([stack-arg-offset (align 8 stack-arg-offset)]) + (loop (cdr types) + (cons (load-double-stack stack-arg-offset) locs) + iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 8))))] + [(and (constant software-floating-point) + (nanopass-case (Ltype Type) (car types) + [(fp-double-float) #t] + [else #f])) + (let ([iint (align 2 iint)]) + (if (fx< iint gp-reg-count) + (let ([int-reg-offset (align 8 int-reg-offset)]) + (loop (cdr types) + (cons (load-double-stack int-reg-offset) locs) + (fx+ iint 2) iflt (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)) + (let ([stack-arg-offset (align 8 stack-arg-offset)]) + (loop (cdr types) + (cons (load-double-stack stack-arg-offset) locs) + iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 8)))))] + [(and (constant software-floating-point) + (nanopass-case (Ltype Type) (car types) + [(fp-single-float) #t] + [else #f])) + (if (fx< iint gp-reg-count) + (loop (cdr types) + (cons (load-soft-single-stack int-reg-offset) locs) + (fx+ iint 1) iflt (fx+ int-reg-offset 4) float-reg-offset stack-arg-offset) + (loop (cdr types) + (cons (load-soft-single-stack stack-arg-offset) locs) + iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 4)))] + [(nanopass-case (Ltype Type) (car types) + [(fp-ftd& ,ftd) (not ($ftd-compound? ftd))] + [else #f]) + ;; load pointer to address on the stack + (let ([ftd (nanopass-case (Ltype Type) (car types) + [(fp-ftd& ,ftd) ftd])]) + (case (and (not (constant software-floating-point)) + ($ftd-atomic-category ftd)) + [(float) + (let ([load-address (case ($ftd-size ftd) + [(4) load-stack-address/convert-float] + [else load-stack-address])]) + (if (fx< iflt fp-reg-count) + (loop (cdr types) + (cons (load-address float-reg-offset) locs) + iint (fx+ iflt 1) int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset) + (let ([stack-arg-offset (align 8 stack-arg-offset)]) + (loop (cdr types) + (cons (load-address stack-arg-offset) locs) + iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 8)))))] + [else + (case ($ftd-size ftd) + [(8) + (let ([iint (align 2 iint)]) + (if (fx< iint gp-reg-count) + (let ([int-reg-offset (align 8 int-reg-offset)]) + (loop (cdr types) + (cons (load-stack-address int-reg-offset) locs) + (fx+ iint 2) iflt (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)) + (let ([stack-arg-offset (align 8 stack-arg-offset)]) + (loop (cdr types) + (cons (load-stack-address stack-arg-offset) locs) + iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 8)))))] + [else + (let ([byte-offset (- 4 ($ftd-size ftd))]) + (if (fx< iint gp-reg-count) (loop (cdr types) - (cons (load-address stack-arg-offset) locs) - iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 8)))))] - [else - (case ($ftd-size ftd) - [(8) - (let ([iint (align 2 iint)]) - (if (fx< iint gp-reg-count) - (let ([int-reg-offset (align 8 int-reg-offset)]) - (loop (cdr types) - (cons (load-stack-address int-reg-offset) locs) - (fx+ iint 2) iflt (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)) - (let ([stack-arg-offset (align 8 stack-arg-offset)]) - (loop (cdr types) - (cons (load-stack-address stack-arg-offset) locs) - iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 8)))))] - [else - (let ([byte-offset (- 4 ($ftd-size ftd))]) - (if (fx< iint gp-reg-count) - (loop (cdr types) - (cons (load-stack-address (+ int-reg-offset byte-offset)) locs) - (fx+ iint 1) iflt (fx+ int-reg-offset 4) float-reg-offset stack-arg-offset) - (loop (cdr types) - (cons (load-stack-address (+ stack-arg-offset byte-offset)) locs) - iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 4))))])]))] - [(nanopass-case (Ltype Type) (car types) - [(fp-integer ,bits) (fx= bits 64)] - [(fp-unsigned ,bits) (fx= bits 64)] - [else #f]) - (let ([iint (align 2 iint)]) - (if (fx< iint gp-reg-count) - (let ([int-reg-offset (align 8 int-reg-offset)]) - (loop (cdr types) - (cons (load-int64-stack int-reg-offset) locs) - (fx+ iint 2) iflt (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)) - (let ([stack-arg-offset (align 8 stack-arg-offset)]) - (loop (cdr types) - (cons (load-int64-stack stack-arg-offset) locs) - iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 8)))))] - [else - (if (fx< iint gp-reg-count) - (loop (cdr types) - (cons (load-int-stack (car types) int-reg-offset) locs) - (fx+ iint 1) iflt (fx+ int-reg-offset 4) float-reg-offset stack-arg-offset) - (loop (cdr types) - (cons (load-int-stack (car types) stack-arg-offset) locs) - iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 4)))]))))) + (cons (load-stack-address (+ int-reg-offset byte-offset)) locs) + (fx+ iint 1) iflt (fx+ int-reg-offset 4) float-reg-offset stack-arg-offset) + (loop (cdr types) + (cons (load-stack-address (+ stack-arg-offset byte-offset)) locs) + iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 4))))])]))] + [(nanopass-case (Ltype Type) (car types) + [(fp-integer ,bits) (fx= bits 64)] + [(fp-unsigned ,bits) (fx= bits 64)] + [else #f]) + (let ([iint (align 2 iint)]) + (if (fx< iint gp-reg-count) + (let ([int-reg-offset (align 8 int-reg-offset)]) + (loop (cdr types) + (cons (load-int64-stack int-reg-offset) locs) + (fx+ iint 2) iflt (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)) + (let ([stack-arg-offset (align 8 stack-arg-offset)]) + (loop (cdr types) + (cons (load-int64-stack stack-arg-offset) locs) + iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 8)))))] + [else + (if (fx< iint gp-reg-count) + (loop (cdr types) + (cons (load-int-stack (car types) int-reg-offset) locs) + (fx+ iint 1) iflt (fx+ int-reg-offset 4) float-reg-offset stack-arg-offset) + (loop (cdr types) + (cons (load-int-stack (car types) stack-arg-offset) locs) + iint iflt int-reg-offset float-reg-offset (fx+ stack-arg-offset 4)))]))))) (define count-reg-args (lambda (types gp-reg-count fp-reg-count synthesize-first-argument?) (let f ([types types] [iint (if synthesize-first-argument? -1 0)] [iflt 0]) @@ -3489,75 +3489,75 @@ (define do-result (lambda (result-type return-space-offset int-reg-offset) (nanopass-case (Ltype Type) result-type - [(fp-ftd& ,ftd) - (case ($ftd-atomic-category ftd) - [(float) - (values - (lambda () - (case ($ftd-size ftd) - [(4) `(set! ,%Cfpretval ,(%inline load-single->double ,(%mref ,%sp ,%zero ,return-space-offset fp)))] - [else `(set! ,%Cfpretval ,(%mref ,%sp ,%zero ,return-space-offset fp))])) - '() - 1)] - [else - (cond - [($ftd-compound? ftd) - ;; return pointer - (values - (lambda () `(set! ,%Cretval ,(%mref ,%sp ,int-reg-offset))) - (list %Cretval) - 0)] - [(fx= 8 ($ftd-size ftd)) - (values (lambda () - (%seq - (set! ,%Cretval-high ,(%mref ,%sp ,return-space-offset)) - (set! ,%Cretval-low ,(%mref ,%sp ,(fx+ return-space-offset 4))))) - (list %Cretval-high %Cretval-low) - 0)] - [else - (values - (lambda () - (case ($ftd-size ftd) - [(1) + [(fp-ftd& ,ftd) + (case ($ftd-atomic-category ftd) + [(float) + (values + (lambda () + (case ($ftd-size ftd) + [(4) `(set! ,%Cfpretval ,(%inline load-single->double ,(%mref ,%sp ,%zero ,return-space-offset fp)))] + [else `(set! ,%Cfpretval ,(%mref ,%sp ,%zero ,return-space-offset fp))])) + '() + 1)] + [else + (cond + [($ftd-compound? ftd) + ;; return pointer + (values + (lambda () `(set! ,%Cretval ,(%mref ,%sp ,int-reg-offset))) + (list %Cretval) + 0)] + [(fx= 8 ($ftd-size ftd)) + (values (lambda () + (%seq + (set! ,%Cretval-high ,(%mref ,%sp ,return-space-offset)) + (set! ,%Cretval-low ,(%mref ,%sp ,(fx+ return-space-offset 4))))) + (list %Cretval-high %Cretval-low) + 0)] + [else + (values + (lambda () + (case ($ftd-size ftd) + [(1) (let ([type (if ($ftd-unsigned? ftd) 'unsigned-8 'integer-8)]) `(set! ,%Cretval (inline ,(make-info-load type #f) ,%load ,%sp ,%zero (immediate ,return-space-offset))))] - [(2) + [(2) (let ([type (if ($ftd-unsigned? ftd) 'unsigned-16 'integer-16)]) `(set! ,%Cretval (inline ,(make-info-load type #f) ,%load ,%sp ,%zero (immediate ,return-space-offset))))] - [else `(set! ,%Cretval ,(%mref ,%sp ,return-space-offset))])) - (list %Cretval) - 0)])])] + [else `(set! ,%Cretval ,(%mref ,%sp ,return-space-offset))])) + (list %Cretval) + 0)])])] [(fp-double-float) (values (lambda (x) `(set! ,%Cfpretval ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp))) - '() - 1)] + '() + 1)] [(fp-single-float) (values (lambda (x) `(set! ,%Cfpretval ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp))) - '() - 1)] + '() + 1)] [(fp-void) (values (lambda () `(nop)) '() - 0)] - [else - (cond - [(nanopass-case (Ltype Type) result-type - [(fp-integer ,bits) (fx= bits 64)] - [(fp-unsigned ,bits) (fx= bits 64)] - [else #f]) - (values (lambda (lo-rhs hi-rhs) - (%seq - (set! ,%Cretval-low ,lo-rhs) - (set! ,%Cretval-high ,hi-rhs))) - (list %Cretval-high %Cretval-low) - 0)] - [else - (values (lambda (rhs) - `(set! ,%Cretval ,rhs)) - (list %Cretval) - 0)])]))) + 0)] + [else + (cond + [(nanopass-case (Ltype Type) result-type + [(fp-integer ,bits) (fx= bits 64)] + [(fp-unsigned ,bits) (fx= bits 64)] + [else #f]) + (values (lambda (lo-rhs hi-rhs) + (%seq + (set! ,%Cretval-low ,lo-rhs) + (set! ,%Cretval-high ,hi-rhs))) + (list %Cretval-high %Cretval-low) + 0)] + [else + (values (lambda (rhs) + `(set! ,%Cretval ,rhs)) + (list %Cretval) + 0)])]))) (define result-regs-bytes (lambda (result-type) (let-values ([(get-result result-regs result-num-fp-regs) @@ -3567,18 +3567,18 @@ (fx* result-num-fp-regs 8))))) (define (unactivate unactivate-mode-offset result-regs result-num-fp-regs stash-offset) (let ([e (%seq - (set! ,%Carg1 ,(%mref ,%sp ,unactivate-mode-offset)) - ,(%inline unactivate-thread ,%Carg1))]) - (pop-registers result-regs result-num-fp-regs (fp-result-regs) stash-offset - (push-registers result-regs result-num-fp-regs (fp-result-regs) stash-offset - e)))) + (set! ,%Carg1 ,(%mref ,%sp ,unactivate-mode-offset)) + ,(%inline unactivate-thread ,%Carg1))]) + (pop-registers result-regs result-num-fp-regs (fp-result-regs) stash-offset + (push-registers result-regs result-num-fp-regs (fp-result-regs) stash-offset + e)))) (lambda (info) (define callee-save-regs (list %r14 %r15 %r16 %r17 %r18 %r19 %r20 %r21 %r22 %r23 %r24 %r25 %r26 %r27 %r28 %r29 %r30 %r31)) (define callee-save-fp-regs (list %fpreg1 %fpreg2)) (define isaved (length callee-save-regs)) (define fpsaved (length callee-save-fp-regs)) (let ([arg-type* (info-foreign-arg-type* info)] - [result-type (info-foreign-result-type info)] + [result-type (info-foreign-result-type info)] [gp-reg-count (length (gp-parameter-regs))] [fp-reg-count (length (fp-parameter-regs))] [adjust-active? (if-feature pthreads (memq 'adjust-active (info-foreign-conv* info)) #f)]) @@ -3593,58 +3593,58 @@ float-reg-offset (fx+ (fx* fp-reg-count 8) float-reg-offset))] [callee-save-fp-offset (fx+ (fx* isaved 4) callee-save-offset)] - [synthesize-first-argument? (indirect-result-that-fits-in-registers? result-type)] - [varargs-after (ormap (lambda (conv) (and (pair? conv) (eq? 'varargs (car conv)) (cdr conv))) + [synthesize-first-argument? (indirect-result-that-fits-in-registers? result-type)] + [varargs-after (ormap (lambda (conv) (and (pair? conv) (eq? 'varargs (car conv)) (cdr conv))) (info-foreign-conv* info))] [unactivate-mode-offset (fx+ (fx* fpsaved 8) callee-save-fp-offset)] [return-space-offset (align 8 (fx+ unactivate-mode-offset (if adjust-active? 4 0)))] [stack-size (align 16 (fx+ return-space-offset (if synthesize-first-argument? 8 0)))] [stack-arg-offset (fx+ stack-size stack-arguments-starting-offset)]) - (let-values ([(get-result result-regs result-num-fp-regs) (do-result result-type return-space-offset int-reg-offset)]) - (values - (lambda () - (%seq - ,(%inline save-lr (immediate 4)) - ,(%inline store-with-update ,%Csp ,%Csp (immediate ,(fx- stack-size))) - ,(save-regs (list-head (gp-parameter-regs) iint) int-reg-offset) - ,(save-fp-regs (list-head (fp-parameter-regs) iflt) float-reg-offset) - ,(save-regs callee-save-regs callee-save-offset) - ,(save-fp-regs callee-save-fp-regs callee-save-fp-offset) - ,(if-feature pthreads - ((lambda (e) - (if adjust-active? - (%seq - (set! ,%Cretval ,(%inline activate-thread)) - (set! ,(%mref ,%sp ,unactivate-mode-offset) ,%Cretval) - ,e) - e)) - (%seq - (set! ,%Cretval ,(%inline get-tc)) - (set! ,%tc ,%Cretval))) - `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) - ; list of procedures that marshal arguments from their C stack locations - ; to the Scheme argument locations + (let-values ([(get-result result-regs result-num-fp-regs) (do-result result-type return-space-offset int-reg-offset)]) + (values + (lambda () + (%seq + ,(%inline save-lr (immediate 4)) + ,(%inline store-with-update ,%Csp ,%Csp (immediate ,(fx- stack-size))) + ,(save-regs (list-head (gp-parameter-regs) iint) int-reg-offset) + ,(save-fp-regs (list-head (fp-parameter-regs) iflt) float-reg-offset) + ,(save-regs callee-save-regs callee-save-offset) + ,(save-fp-regs callee-save-fp-regs callee-save-fp-offset) + ,(if-feature pthreads + ((lambda (e) + (if adjust-active? + (%seq + (set! ,%Cretval ,(%inline activate-thread)) + (set! ,(%mref ,%sp ,unactivate-mode-offset) ,%Cretval) + ,e) + e)) + (%seq + (set! ,%Cretval ,(%inline get-tc)) + (set! ,%tc ,%Cretval))) + `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) + ; list of procedures that marshal arguments from their C stack locations + ; to the Scheme argument locations (do-stack (indirect-result-to-pointer result-type arg-type*) gp-reg-count fp-reg-count int-reg-offset float-reg-offset stack-arg-offset - synthesize-first-argument? varargs-after return-space-offset) - get-result - (lambda () - (in-context Tail - ((lambda (e) - (if adjust-active? - (%seq - ,(unactivate unactivate-mode-offset result-regs result-num-fp-regs int-reg-offset) - ,e) - e)) - (%seq - ; restore the lr - (inline ,null-info ,%restore-lr (immediate ,(fx+ stack-size 4))) - ; restore the callee save registers - ,(restore-regs callee-save-regs callee-save-offset) - ,(restore-fp-regs callee-save-fp-regs callee-save-fp-offset) - ; deallocate space for pad & arg reg values - (set! ,%Csp ,(%inline + ,%Csp (immediate ,stack-size))) - ; done - (asm-c-return ,null-info ,callee-save-regs ... ,callee-save-fp-regs ... - ,result-regs ... ,(list-head (fp-result-regs) result-num-fp-regs) ...))))))))))))))) + synthesize-first-argument? varargs-after return-space-offset) + get-result + (lambda () + (in-context Tail + ((lambda (e) + (if adjust-active? + (%seq + ,(unactivate unactivate-mode-offset result-regs result-num-fp-regs int-reg-offset) + ,e) + e)) + (%seq + ; restore the lr + (inline ,null-info ,%restore-lr (immediate ,(fx+ stack-size 4))) + ; restore the callee save registers + ,(restore-regs callee-save-regs callee-save-offset) + ,(restore-fp-regs callee-save-fp-regs callee-save-fp-offset) + ; deallocate space for pad & arg reg values + (set! ,%Csp ,(%inline + ,%Csp (immediate ,stack-size))) + ; done + (asm-c-return ,null-info ,callee-save-regs ... ,callee-save-fp-regs ... + ,result-regs ... ,(list-head (fp-result-regs) result-num-fp-regs) ...))))))))))))))) ) diff --git a/s/riscv64.ss b/s/riscv64.ss index 0d8f875dd..965358bd3 100644 --- a/s/riscv64.ss +++ b/s/riscv64.ss @@ -387,7 +387,7 @@ (define-instruction value (fpcastto) [(op (x mem) (y fpur)) `(set! ,(make-live-info) ,(mem->mem x 'fp) ,y)] [(op (x ur) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpcastto ,y))]) - + (define-instruction value (fpcastfrom) [(op (x fpmem) (y ur)) `(set! ,(make-live-info) ,(mem->mem x 'uptr) ,y)] [(op (x fpur) (y ur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpcastfrom ,y))])) @@ -1413,7 +1413,7 @@ ;; dest can be an mref, and then the offset is double-aligned (lambda (code* dest src) (gen-fpmove who code* dest src #t))) - + (define-who asm-fpmove-single (lambda (code* dest src) (gen-fpmove who code* dest src #f))) @@ -1465,7 +1465,7 @@ (lambda (code* dest src) (Trivit (dest src) (emit fmov.d.x dest src '() code*)))) - + ;; flonum to fixnum (define-who asm-fptrunc (lambda (code* dest src) diff --git a/s/x86.ss b/s/x86.ss index 8147b5c87..fcebf3b21 100644 --- a/s/x86.ss +++ b/s/x86.ss @@ -894,7 +894,7 @@ (define-op extad byte-op #b10011001) ; extend eax to edx (define-op int3 byte-op #b11001100) - + (define-op rdtsc two-byte-op #b1111 #b00110001) ; read time-stamp counter (define-op rdpmc two-byte-op #b1111 #b00110011) ; read performance monitoring counter (define-op pause two-byte-op #b11110011 #b10010000) ; equivalent to rep nop @@ -1293,7 +1293,7 @@ (emit-code (op disp code*) (build byte #b11101001) (build long offset)))] - [else + [else (emit-code (op disp code*) (build byte #b11101001) (ax-ea-branch-disp disp))]))) @@ -1447,7 +1447,7 @@ [(and (eqv? 0 size) (not (eq? base-reg %ebp))) #b00] [(ax-byte-size? size) #b01] [else #b10])] - [(literal@) stuff #b00] + [(literal@) stuff #b00] [(disp) (size reg) (cond [(and (eqv? 0 size) (not (eq? reg %ebp))) #b00] ; indirect @@ -2463,14 +2463,14 @@ [(4) (cond [(and (if-feature windows (not ($ftd-compound? ftd)) #t) - (equal? '((float 4 0)) ($ftd->members ftd))) + (equal? '((float 4 0)) ($ftd->members ftd))) `(set! ,(%mref ,%ecx ,%zero 0 fp) ,(%inline fstps))] [else `(set! ,(%mref ,%ecx 0) ,%eax)])] [(8) (cond [(and (if-feature windows (not ($ftd-compound? ftd)) #t) - (equal? '((float 8 0)) ($ftd->members ftd))) + (equal? '((float 8 0)) ($ftd->members ftd))) `(set! ,(%mref ,%ecx ,%zero 0 fp) ,(%inline fstpl))] [else `(seq @@ -2707,7 +2707,7 @@ ;; the extra 4 bytes may be used for the unactivate mode 12])] [init-stack-offset (fx+ 20 indirect-result-space)] - [indirect-result-to-registers? (fill-result-pointer-from-registers? result-type)]) + [indirect-result-to-registers? (fill-result-pointer-from-registers? result-type)]) (let-values ([(get-result result-regs result-num-fp-regs) (do-result result-type init-stack-offset indirect-result-to-registers?)]) (with-values (do-stack (if indirect-result-to-registers? diff --git a/s/x86_64.ss b/s/x86_64.ss index 1988ec575..0d876a159 100644 --- a/s/x86_64.ss +++ b/s/x86_64.ss @@ -1623,7 +1623,7 @@ (and maybe-reg (rex-required? maybe-reg))) b))) (record-case ea - [(index) (size index-reg base-reg) + [(index) (size index-reg base-reg) (build-rex (fxsrl (reg-mdinfo index-reg) 3) (fxsrl (reg-mdinfo base-reg) 3))] @@ -2485,53 +2485,53 @@ (if-feature windows ;; Windows: either passed in one register or not (define (classify-eightbytes ftd) - (cond - [($ftd-compound? ftd) - (if (memv ($ftd-size ftd) '(1 2 4 8)) - '(integer) - '(memory))] - [(eq? 'float (caar ($ftd->members ftd))) - '(sse)] - [else '(integer)])) + (cond + [($ftd-compound? ftd) + (if (memv ($ftd-size ftd) '(1 2 4 8)) + '(integer) + '(memory))] + [(eq? 'float (caar ($ftd->members ftd))) + '(sse)] + [else '(integer)])) ;; Non-Windows: SYSV ABI is a more general classification of ;; 8-byte segments into 'integer, 'sse, or 'memory modes (define (classify-eightbytes ftd) - (define (merge t1 t2) - (cond - [(eq? t1 t2) t1] - [(eq? t1 'no-class) t2] - [(eq? t2 'no-class) t1] - [(eq? t1 'memory) 'memory] - [(eq? t2 'memory) 'memory] - [else 'integer])) - (cond - [(or (> ($ftd-size ftd) 16) ; more than 2 eightbytes => passed in memory - (fx= 0 ($ftd-size ftd))) - '(memory)] - [else - (let ([classes (make-vector (fxsrl (align ($ftd-size ftd) 8) 3) 'no-class)]) - (let loop ([mbrs ($ftd->members ftd)]) - (cond - [(null? mbrs) - (vector->list classes)] - [else - (let ([kind (caar mbrs)] - [size (cadar mbrs)] - [offset (caddar mbrs)]) - (cond - [(not (fx= offset (align offset size))) - ;; misaligned - '(memory)] - [else - (let* ([pos (fxsrl offset 3)] - [class (vector-ref classes pos)] - [new-class (merge class (if (eq? kind 'float) 'sse 'integer))]) - (cond - [(eq? new-class 'memory) - '(memory)] - [else - (vector-set! classes pos new-class) - (loop (cdr mbrs))]))]))])))]))) + (define (merge t1 t2) + (cond + [(eq? t1 t2) t1] + [(eq? t1 'no-class) t2] + [(eq? t2 'no-class) t1] + [(eq? t1 'memory) 'memory] + [(eq? t2 'memory) 'memory] + [else 'integer])) + (cond + [(or (> ($ftd-size ftd) 16) ; more than 2 eightbytes => passed in memory + (fx= 0 ($ftd-size ftd))) + '(memory)] + [else + (let ([classes (make-vector (fxsrl (align ($ftd-size ftd) 8) 3) 'no-class)]) + (let loop ([mbrs ($ftd->members ftd)]) + (cond + [(null? mbrs) + (vector->list classes)] + [else + (let ([kind (caar mbrs)] + [size (cadar mbrs)] + [offset (caddar mbrs)]) + (cond + [(not (fx= offset (align offset size))) + ;; misaligned + '(memory)] + [else + (let* ([pos (fxsrl offset 3)] + [class (vector-ref classes pos)] + [new-class (merge class (if (eq? kind 'float) 'sse 'integer))]) + (cond + [(eq? new-class 'memory) + '(memory)] + [else + (vector-set! classes pos new-class) + (loop (cdr mbrs))]))]))])))]))) (define (count v l) (cond @@ -2959,7 +2959,7 @@ (returnem frame-size locs (lambda (t0 not-varargs?) (let* ([t (if adjust-active? %deact t0)] ; need a register if `adjust-active?` - [kill* (add-caller-save-registers result-reg*)] + [kill* (add-caller-save-registers result-reg*)] [c-call (add-deactivate adjust-active? t0 (append fp-live* live*) result-reg* @@ -3030,8 +3030,8 @@ incoming | incoming return address | one quad +---------------------------+ | pad word / indirect space | one quad sp+0: +---------------------------+<- 16-byte boundary - - + + Standard: Frame Layout +---------------------------+ @@ -3212,14 +3212,14 @@ incoming | incoming return address | one quad (nanopass-case (Ltype Type) (car types) [(fp-double-float) (load-double-stack isp)] [(fp-single-float) (load-single-stack isp)] - [(fp-ftd& ,ftd) - (cond - [(memq ($ftd-size ftd) '(1 2 4 8)) - ;; passed by value - (load-stack-address isp)] - [else - ;; passed by reference - (load-int-stack (car types) isp)])] + [(fp-ftd& ,ftd) + (cond + [(memq ($ftd-size ftd) '(1 2 4 8)) + ;; passed by value + (load-stack-address isp)] + [else + ;; passed by reference + (load-int-stack (car types) isp)])] [else (load-int-stack (car types) isp)]) locs) (fx+ isp 8)))) @@ -3399,7 +3399,7 @@ incoming | incoming return address | one quad (set! ,(%mref ,%sp ,active-state-offset) ,%rax) ,e) e)) - (%seq + (%seq (set! ,%rax ,(%inline get-tc)) (set! ,%tc ,%rax))) `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0))))))