1 ! Copyright (C) 2005, 2009 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors assocs alien alien.c-types arrays strings
4 cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
5 cpu.architecture kernel kernel.private math memory namespaces make
6 sequences words system layouts combinators math.order fry locals
7 compiler.constants byte-arrays io macros quotations cpu.x86.features
8 cpu.x86.features.private compiler compiler.units init
10 compiler.cfg.instructions
11 compiler.cfg.intrinsics
12 compiler.cfg.comparisons
13 compiler.cfg.stack-frame
15 compiler.codegen.fixup ;
19 << enable-fixnum-log2 >>
21 ! Add some methods to the assembler to be more useful to the backend
22 M: label JMP 0 JMP rc-relative label-fixup ;
23 M: label JUMPcc [ 0 ] dip JUMPcc rc-relative label-fixup ;
25 M: x86 two-operand? t ;
27 HOOK: stack-reg cpu ( -- reg )
29 HOOK: reserved-area-size cpu ( -- n )
31 : stack@ ( n -- op ) stack-reg swap [+] ;
33 : param@ ( n -- op ) reserved-area-size + stack@ ;
35 : spill@ ( n -- op ) spill-offset param@ ;
37 : gc-root@ ( n -- op ) gc-root-offset param@ ;
39 : decr-stack-reg ( n -- )
40 dup 0 = [ drop ] [ stack-reg swap SUB ] if ;
42 : incr-stack-reg ( n -- )
43 dup 0 = [ drop ] [ stack-reg swap ADD ] if ;
45 : align-stack ( n -- n' )
46 os macosx? cpu x86.64? or [ 16 align ] when ;
48 M: x86 stack-frame-size ( stack-frame -- i )
49 (stack-frame-size) 3 cells reserved-area-size + + align-stack ;
51 ! Must be a volatile register not used for parameter passing, for safe
52 ! use in calls in and out of C
53 HOOK: temp-reg cpu ( -- reg )
55 ! Fastcall calling convention
56 HOOK: param-reg-1 cpu ( -- reg )
57 HOOK: param-reg-2 cpu ( -- reg )
59 HOOK: pic-tail-reg cpu ( -- reg )
61 M: x86 %load-immediate dup 0 = [ drop dup XOR ] [ MOV ] if ;
63 M: x86 %load-reference swap 0 MOV rc-absolute-cell rel-immediate ;
65 HOOK: ds-reg cpu ( -- reg )
66 HOOK: rs-reg cpu ( -- reg )
68 : reg-stack ( n reg -- op ) swap cells neg [+] ;
70 GENERIC: loc>operand ( loc -- operand )
72 M: ds-loc loc>operand n>> ds-reg reg-stack ;
73 M: rs-loc loc>operand n>> rs-reg reg-stack ;
75 M: x86 %peek loc>operand MOV ;
76 M: x86 %replace loc>operand swap MOV ;
77 : (%inc) ( n reg -- ) swap cells dup 0 > [ ADD ] [ neg SUB ] if ; inline
78 M: x86 %inc-d ( n -- ) ds-reg (%inc) ;
79 M: x86 %inc-r ( n -- ) rs-reg (%inc) ;
81 M: x86 %call ( word -- ) 0 CALL rc-relative rel-word-pic ;
83 : xt-tail-pic-offset ( -- n )
84 #! See the comment in vm/cpu-x86.hpp
87 M: x86 %jump ( word -- )
88 pic-tail-reg 0 MOV xt-tail-pic-offset rc-absolute-cell rel-here
89 0 JMP rc-relative rel-word-pic-tail ;
91 M: x86 %jump-label ( label -- ) 0 JMP rc-relative label-fixup ;
93 M: x86 %return ( -- ) 0 RET ;
95 : code-alignment ( align -- n )
96 [ building get length dup ] dip align swap - ;
101 :: (%slot) ( obj slot tag temp -- op )
102 temp slot obj [+] LEA
103 temp tag neg [+] ; inline
105 :: (%slot-imm) ( obj slot tag -- op )
106 obj slot cells tag - [+] ; inline
108 M: x86 %slot ( dst obj slot tag temp -- ) (%slot) MOV ;
109 M: x86 %slot-imm ( dst obj slot tag -- ) (%slot-imm) MOV ;
110 M: x86 %set-slot ( src obj slot tag temp -- ) (%slot) swap MOV ;
111 M: x86 %set-slot-imm ( src obj slot tag -- ) (%slot-imm) swap MOV ;
113 M: x86 %add 2over eq? [ nip ADD ] [ [+] LEA ] if ;
114 M: x86 %add-imm 2over eq? [ nip ADD ] [ [+] LEA ] if ;
115 M: x86 %sub nip SUB ;
116 M: x86 %sub-imm 2over eq? [ nip SUB ] [ neg [+] LEA ] if ;
117 M: x86 %mul nip swap IMUL2 ;
118 M: x86 %mul-imm IMUL3 ;
119 M: x86 %and nip AND ;
120 M: x86 %and-imm nip AND ;
122 M: x86 %or-imm nip OR ;
123 M: x86 %xor nip XOR ;
124 M: x86 %xor-imm nip XOR ;
125 M: x86 %shl-imm nip SHL ;
126 M: x86 %shr-imm nip SHR ;
127 M: x86 %sar-imm nip SAR ;
129 M: x86 %min nip [ CMP ] [ CMOVG ] 2bi ;
130 M: x86 %max nip [ CMP ] [ CMOVL ] 2bi ;
132 M: x86 %not drop NOT ;
135 GENERIC: copy-register* ( dst src rep -- )
137 M: int-rep copy-register* drop MOV ;
138 M: tagged-rep copy-register* drop MOV ;
139 M: float-rep copy-register* drop MOVSS ;
140 M: double-rep copy-register* drop MOVSD ;
141 M: float-4-rep copy-register* drop MOVUPS ;
142 M: double-2-rep copy-register* drop MOVUPD ;
143 M: vector-rep copy-register* drop MOVDQU ;
145 : copy-register ( dst src rep -- )
146 2over eq? [ 3drop ] [ copy-register* ] if ;
148 M: x86 %copy ( dst src rep -- ) copy-register ;
150 :: overflow-template ( label dst src1 src2 insn -- )
154 M: x86 %fixnum-add ( label dst src1 src2 -- )
155 [ ADD ] overflow-template ;
157 M: x86 %fixnum-sub ( label dst src1 src2 -- )
158 [ SUB ] overflow-template ;
160 M: x86 %fixnum-mul ( label dst src1 src2 -- )
161 [ swap IMUL2 ] overflow-template ;
163 : bignum@ ( reg n -- op )
164 cells bignum tag-number - [+] ; inline
166 M:: x86 %integer>bignum ( dst src temp -- )
167 #! on entry, inreg is a signed 32-bit quantity
168 #! exits with tagged ptr to bignum in outreg
169 #! 1 cell header, 1 cell length, 1 cell sign, + digits
170 #! length is the # of digits + sign
173 ! Load cached zero value
174 dst 0 >bignum %load-reference
176 ! Is it zero? Then just go to the end and return this zero
179 dst 4 cells bignum temp %allot
181 dst 1 bignum@ 2 tag-fixnum MOV
183 dst 3 bignum@ src MOV
186 temp cell-bits 1 - SAR
189 dst 2 bignum@ temp MOV
190 ! Make negative value positive
196 dst 3 bignum@ temp MOV
200 M:: x86 %bignum>integer ( dst src temp -- )
204 temp src 1 bignum@ MOV
205 ! if the length is 1, its just the sign and nothing else,
208 temp 1 tag-fixnum CMP
211 dst src 3 bignum@ MOV
213 temp src 2 bignum@ MOV
214 ! convert it into -1 or 1
223 M: x86 %add-float nip ADDSD ;
224 M: x86 %sub-float nip SUBSD ;
225 M: x86 %mul-float nip MULSD ;
226 M: x86 %div-float nip DIVSD ;
227 M: x86 %min-float nip MINSD ;
228 M: x86 %max-float nip MAXSD ;
229 M: x86 %sqrt SQRTSD ;
231 M: x86 %single>double-float CVTSS2SD ;
232 M: x86 %double>single-float CVTSD2SS ;
234 M: x86 %integer>float CVTSI2SD ;
235 M: x86 %float>integer CVTTSD2SI ;
237 M: x86 %unbox-float ( dst src -- )
238 float-offset [+] MOVSD ;
240 M:: x86 %box-float ( dst src temp -- )
241 dst 16 float temp %allot
242 dst float-offset [+] src MOVSD ;
244 M:: x86 %box-vector ( dst src rep temp -- )
245 dst rep rep-size 2 cells + byte-array temp %allot
246 16 tag-fixnum dst 1 byte-array tag-number %set-slot-imm
247 dst byte-array-offset [+]
248 src rep copy-register ;
250 M:: x86 %unbox-vector ( dst src rep -- )
251 dst src byte-array-offset [+]
254 MACRO: available-reps ( alist -- )
255 ! Each SSE version adds new representations and supports
257 unzip { } [ append ] accumulate rest swap suffix
258 [ [ 1quotation ] map ] bi@ zip
259 reverse [ { } ] suffix
262 M: x86 %broadcast-vector ( dst src rep -- )
264 { float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] }
265 { double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] }
268 M: x86 %broadcast-vector-reps
270 { sse? { float-4-rep } }
271 { sse2? { double-2-rep } }
274 M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
287 M: x86 %gather-vector-4-reps
289 { sse? { float-4-rep } }
292 M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
303 M: x86 %gather-vector-2-reps
305 { sse2? { double-2-rep } }
308 M: x86 %add-vector ( dst src1 src2 rep -- )
310 { float-4-rep [ ADDPS ] }
311 { double-2-rep [ ADDPD ] }
312 { char-16-rep [ PADDB ] }
313 { uchar-16-rep [ PADDB ] }
314 { short-8-rep [ PADDW ] }
315 { ushort-8-rep [ PADDW ] }
316 { int-4-rep [ PADDD ] }
317 { uint-4-rep [ PADDD ] }
320 M: x86 %add-vector-reps
322 { sse? { float-4-rep } }
323 { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
326 M: x86 %sub-vector ( dst src1 src2 rep -- )
328 { float-4-rep [ SUBPS ] }
329 { double-2-rep [ SUBPD ] }
330 { char-16-rep [ PSUBB ] }
331 { uchar-16-rep [ PSUBB ] }
332 { short-8-rep [ PSUBW ] }
333 { ushort-8-rep [ PSUBW ] }
334 { int-4-rep [ PSUBD ] }
335 { uint-4-rep [ PSUBD ] }
338 M: x86 %sub-vector-reps
340 { sse? { float-4-rep } }
341 { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
344 M: x86 %mul-vector ( dst src1 src2 rep -- )
346 { float-4-rep [ MULPS ] }
347 { double-2-rep [ MULPD ] }
348 { short-8-rep [ PMULLW ] }
349 { ushort-8-rep [ PMULLW ] }
350 { int-4-rep [ PMULLD ] }
351 { uint-4-rep [ PMULLD ] }
354 M: x86 %mul-vector-reps
356 { sse? { float-4-rep } }
357 { sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
360 M: x86 %div-vector ( dst src1 src2 rep -- )
362 { float-4-rep [ DIVPS ] }
363 { double-2-rep [ DIVPD ] }
366 M: x86 %div-vector-reps
368 { sse? { float-4-rep } }
369 { sse2? { double-2-rep } }
372 M: x86 %min-vector ( dst src1 src2 rep -- )
374 { float-4-rep [ MINPS ] }
375 { double-2-rep [ MINPD ] }
376 { uchar-16-rep [ PMINUB ] }
377 { short-8-rep [ PMINSW ] }
380 M: x86 %min-vector-reps
382 { sse? { float-4-rep } }
383 { sse2? { double-2-rep short-8-rep uchar-16-rep } }
386 M: x86 %max-vector ( dst src1 src2 rep -- )
388 { float-4-rep [ MAXPS ] }
389 { double-2-rep [ MAXPD ] }
390 { uchar-16-rep [ PMAXUB ] }
391 { short-8-rep [ PMAXSW ] }
394 M: x86 %max-vector-reps
396 { sse? { float-4-rep } }
397 { sse2? { double-2-rep short-8-rep uchar-16-rep } }
400 M: x86 %sqrt-vector ( dst src rep -- )
402 { float-4-rep [ SQRTPS ] }
403 { double-2-rep [ SQRTPD ] }
406 M: x86 %sqrt-vector-reps
408 { sse? { float-4-rep } }
409 { sse2? { double-2-rep } }
412 M: x86 %horizontal-add-vector ( dst src rep -- )
414 { float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
415 { double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
418 M: x86 %horizontal-add-vector-reps
420 { sse3? { float-4-rep double-2-rep } }
423 M: x86 %unbox-alien ( dst src -- )
424 alien-offset [+] MOV ;
426 M:: x86 %unbox-any-c-ptr ( dst src temp -- )
428 { "is-byte-array" "end" "start" } [ define-label ] each
431 ! We come back here with displaced aliens
432 "start" resolve-label
434 temp \ f tag-number CMP
436 ! Is the object an alien?
437 temp header-offset [+] alien type-number tag-fixnum CMP
438 "is-byte-array" get JNE
439 ! If so, load the offset and add it to the address
440 dst temp alien-offset [+] ADD
441 ! Now recurse on the underlying alien
442 temp temp underlying-alien-offset [+] MOV
444 "is-byte-array" resolve-label
445 ! Add byte array address to address being computed
447 ! Add an offset to start of byte array's data
448 dst byte-array-offset ADD
452 : alien@ ( reg n -- op ) cells alien tag-number - [+] ;
454 :: %allot-alien ( dst displacement base temp -- )
455 dst 4 cells alien temp %allot
456 dst 1 alien@ base MOV ! alien
457 dst 2 alien@ \ f tag-number MOV ! expired
458 dst 3 alien@ displacement MOV ! displacement
461 M:: x86 %box-alien ( dst src temp -- )
464 dst \ f tag-number MOV
467 dst src \ f tag-number temp %allot-alien
471 M:: x86 %box-displaced-alien ( dst displacement base displacement' base' base-class -- )
475 ! If displacement is zero, return the base
479 ! Quickly use displacement' before its needed for real, as allot temporary
480 dst 4 cells alien displacement' %allot
481 ! If base is already a displaced alien, unpack it
483 displacement' displacement MOV
484 base \ f tag-number CMP
486 base header-offset [+] alien type-number tag-fixnum CMP
488 ! displacement += base.displacement
489 displacement' base 3 alien@ ADD
491 base' base 1 alien@ MOV
493 dst 1 alien@ base' MOV ! alien
494 dst 2 alien@ \ f tag-number MOV ! expired
495 dst 3 alien@ displacement' MOV ! displacement
499 ! The 'small-reg' mess is pretty crappy, but its only used on x86-32.
500 ! On x86-64, all registers have 8-bit versions. However, a similar
501 ! problem arises for shifts, where the shift count must be in CL, and
502 ! so one day I will fix this properly by adding precoloring to the
503 ! register allocator.
505 HOOK: has-small-reg? cpu ( reg size -- ? )
507 CONSTANT: have-byte-regs { EAX ECX EDX EBX }
509 M: x86.32 has-small-reg?
511 { 8 [ have-byte-regs memq? ] }
516 M: x86.64 has-small-reg? 2drop t ;
518 : small-reg-that-isn't ( exclude -- reg' )
519 [ have-byte-regs ] dip
520 [ native-version-of ] map
521 '[ _ memq? not ] find nip ;
523 : with-save/restore ( reg quot -- )
524 [ drop PUSH ] [ call ] [ drop POP ] 2tri ; inline
526 :: with-small-register ( dst exclude size quot: ( new-dst -- ) -- )
527 ! If the destination register overlaps a small register with
528 ! 'size' bits, we call the quot with that. Otherwise, we find a
529 ! small register that is not in exclude, and call quot, saving and
530 ! restoring the small register.
531 dst size has-small-reg? [ dst quot call ] [
532 exclude small-reg-that-isn't
533 [ quot call ] with-save/restore
536 : ?MOV ( dst src -- )
537 2dup = [ 2drop ] [ MOV ] if ; inline
539 M:: x86 %string-nth ( dst src index temp -- )
540 ! We request a small-reg of size 8 since those of size 16 are
543 dst { src index temp } 8 [| new-dst |
544 ! Load the least significant 7 bits into new-dst.
545 ! 8th bit indicates whether we have to load from
546 ! the aux vector or not.
547 temp src index [+] LEA
548 new-dst 8-bit-version-of temp string-offset [+] MOV
549 new-dst new-dst 8-bit-version-of MOVZX
550 ! Do we have to look at the aux vector?
553 ! Yes, this is a non-ASCII character. Load aux vector
554 temp src string-aux-offset [+] MOV
560 new-dst 16-bit-version-of new-dst byte-array-offset [+] MOV
561 new-dst new-dst 16-bit-version-of MOVZX
567 ] with-small-register ;
569 M:: x86 %set-string-nth-fast ( ch str index temp -- )
570 ch { index str temp } 8 [| new-ch |
572 temp str index [+] LEA
573 temp string-offset [+] new-ch 8-bit-version-of MOV
574 ] with-small-register ;
576 :: %alien-integer-getter ( dst src size quot -- )
577 dst { src } size [| new-dst |
578 new-dst dup size n-bit-version-of dup src [] MOV
581 ] with-small-register ; inline
583 : %alien-unsigned-getter ( dst src size -- )
584 [ MOVZX ] %alien-integer-getter ; inline
586 M: x86 %alien-unsigned-1 8 %alien-unsigned-getter ;
587 M: x86 %alien-unsigned-2 16 %alien-unsigned-getter ;
588 M: x86 %alien-unsigned-4 32 [ 2drop ] %alien-integer-getter ;
590 : %alien-signed-getter ( dst src size -- )
591 [ MOVSX ] %alien-integer-getter ; inline
593 M: x86 %alien-signed-1 8 %alien-signed-getter ;
594 M: x86 %alien-signed-2 16 %alien-signed-getter ;
595 M: x86 %alien-signed-4 32 %alien-signed-getter ;
597 M: x86 %alien-cell [] MOV ;
598 M: x86 %alien-float [] MOVSS ;
599 M: x86 %alien-double [] MOVSD ;
600 M: x86 %alien-vector [ [] ] dip copy-register ;
602 :: %alien-integer-setter ( ptr value size -- )
603 value { ptr } size [| new-value |
605 ptr [] new-value size n-bit-version-of MOV
606 ] with-small-register ; inline
608 M: x86 %set-alien-integer-1 8 %alien-integer-setter ;
609 M: x86 %set-alien-integer-2 16 %alien-integer-setter ;
610 M: x86 %set-alien-integer-4 32 %alien-integer-setter ;
611 M: x86 %set-alien-cell [ [] ] dip MOV ;
612 M: x86 %set-alien-float [ [] ] dip MOVSS ;
613 M: x86 %set-alien-double [ [] ] dip MOVSD ;
614 M: x86 %set-alien-vector [ [] ] 2dip copy-register ;
616 : shift-count? ( reg -- ? ) { ECX RCX } memq? ;
618 :: emit-shift ( dst src1 src2 quot -- )
627 ECX native-version-of [
629 drop dst CL quot call
634 M: x86 %shl [ SHL ] emit-shift ;
635 M: x86 %shr [ SHR ] emit-shift ;
636 M: x86 %sar [ SAR ] emit-shift ;
638 : load-zone-ptr ( reg -- )
639 #! Load pointer to start of zone array
640 0 MOV "nursery" f rc-absolute-cell rel-dlsym ;
642 : load-allot-ptr ( nursery-ptr allot-ptr -- )
643 [ drop load-zone-ptr ] [ swap cell [+] MOV ] 2bi ;
645 : inc-allot-ptr ( nursery-ptr n -- )
646 [ cell [+] ] dip 8 align ADD ;
648 : store-header ( temp class -- )
649 [ [] ] [ type-number tag-fixnum ] bi* MOV ;
651 : store-tagged ( dst tag -- )
654 M:: x86 %allot ( dst size class nursery-ptr -- )
655 nursery-ptr dst load-allot-ptr
656 dst class store-header
657 dst class store-tagged
658 nursery-ptr size inc-allot-ptr ;
660 M:: x86 %write-barrier ( src card# table -- )
661 #! Mark the card pointed to by vreg.
665 table "cards_offset" f %alien-global
667 table card# [+] card-mark <byte> MOV
670 card# deck-bits card-bits - SHR
671 table "decks_offset" f %alien-global
673 table card# [+] card-mark <byte> MOV ;
675 M:: x86 %check-nursery ( label temp1 temp2 -- )
677 temp2 temp1 cell [+] MOV
679 temp1 temp1 3 cells [+] MOV
683 M: x86 %save-gc-root ( gc-root register -- ) [ gc-root@ ] dip MOV ;
685 M: x86 %load-gc-root ( gc-root register -- ) swap gc-root@ MOV ;
687 M:: x86 %call-gc ( gc-root-count -- )
688 ! Pass pointer to start of GC roots as first parameter
689 param-reg-1 gc-root-base param@ LEA
690 ! Pass number of roots as second parameter
691 param-reg-2 gc-root-count MOV
693 "inline_gc" f %alien-invoke ;
696 [ 0 MOV ] 2dip rc-absolute-cell rel-dlsym ;
698 M: x86 %epilogue ( n -- ) cell - incr-stack-reg ;
700 :: %boolean ( dst temp word -- )
701 dst \ f tag-number MOV
702 temp 0 MOV \ t rc-absolute-cell rel-immediate
703 dst temp word execute ; inline
705 M:: x86 %compare ( dst src1 src2 cc temp -- )
708 { cc< [ dst temp \ CMOVL %boolean ] }
709 { cc<= [ dst temp \ CMOVLE %boolean ] }
710 { cc> [ dst temp \ CMOVG %boolean ] }
711 { cc>= [ dst temp \ CMOVGE %boolean ] }
712 { cc= [ dst temp \ CMOVE %boolean ] }
713 { cc/= [ dst temp \ CMOVNE %boolean ] }
716 M: x86 %compare-imm ( dst src1 src2 cc temp -- )
719 : %cmov-float= ( dst src -- )
721 "no-move" define-label
723 "no-move" get [ JNE ] [ JP ] bi
725 "no-move" resolve-label
728 : %cmov-float/= ( dst src -- )
730 "no-move" define-label
737 "no-move" resolve-label
740 :: (%compare-float) ( dst src1 src2 cc temp compare -- )
742 { cc< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVA %boolean ] }
743 { cc<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
744 { cc> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVA %boolean ] }
745 { cc>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
746 { cc= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float= %boolean ] }
747 { cc<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNE %boolean ] }
748 { cc<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNP %boolean ] }
749 { cc/< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
750 { cc/<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVB %boolean ] }
751 { cc/> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
752 { cc/>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVB %boolean ] }
753 { cc/= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float/= %boolean ] }
754 { cc/<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVE %boolean ] }
755 { cc/<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVP %boolean ] }
758 M: x86 %compare-float-ordered ( dst src1 src2 cc temp -- )
759 \ COMISD (%compare-float) ;
761 M: x86 %compare-float-unordered ( dst src1 src2 cc temp -- )
762 \ UCOMISD (%compare-float) ;
764 M:: x86 %compare-branch ( label src1 src2 cc -- )
768 { cc<= [ label JLE ] }
770 { cc>= [ label JGE ] }
772 { cc/= [ label JNE ] }
775 M: x86 %compare-imm-branch ( label src1 src2 cc -- )
778 : %jump-float= ( label -- )
780 "no-jump" define-label
783 "no-jump" resolve-label
786 : %jump-float/= ( label -- )
789 :: (%compare-float-branch) ( label src1 src2 cc compare -- )
791 { cc< [ src2 src1 \ compare execute( a b -- ) label JA ] }
792 { cc<= [ src2 src1 \ compare execute( a b -- ) label JAE ] }
793 { cc> [ src1 src2 \ compare execute( a b -- ) label JA ] }
794 { cc>= [ src1 src2 \ compare execute( a b -- ) label JAE ] }
795 { cc= [ src1 src2 \ compare execute( a b -- ) label %jump-float= ] }
796 { cc<> [ src1 src2 \ compare execute( a b -- ) label JNE ] }
797 { cc<>= [ src1 src2 \ compare execute( a b -- ) label JNP ] }
798 { cc/< [ src2 src1 \ compare execute( a b -- ) label JBE ] }
799 { cc/<= [ src2 src1 \ compare execute( a b -- ) label JB ] }
800 { cc/> [ src1 src2 \ compare execute( a b -- ) label JBE ] }
801 { cc/>= [ src1 src2 \ compare execute( a b -- ) label JB ] }
802 { cc/= [ src1 src2 \ compare execute( a b -- ) label %jump-float/= ] }
803 { cc/<> [ src1 src2 \ compare execute( a b -- ) label JE ] }
804 { cc/<>= [ src1 src2 \ compare execute( a b -- ) label JP ] }
807 M: x86 %compare-float-ordered-branch ( label src1 src2 cc -- )
808 \ COMISD (%compare-float-branch) ;
810 M: x86 %compare-float-unordered-branch ( label src1 src2 cc -- )
811 \ UCOMISD (%compare-float-branch) ;
813 M:: x86 %spill ( src rep n -- )
814 n spill@ src rep copy-register ;
816 M:: x86 %reload ( dst rep n -- )
817 dst n spill@ rep copy-register ;
819 M: x86 %loop-entry 16 code-alignment [ NOP ] times ;
821 M:: x86 %save-context ( temp1 temp2 callback-allowed? -- )
822 #! Save Factor stack pointers in case the C code calls a
823 #! callback which does a GC, which must reliably trace
825 temp1 "stack_chain" f %alien-global
827 temp2 stack-reg cell neg [+] LEA
830 temp1 2 cells [+] ds-reg MOV
831 temp1 3 cells [+] rs-reg MOV
834 M: x86 value-struct? drop t ;
836 M: x86 small-enough? ( n -- ? )
837 HEX: -80000000 HEX: 7fffffff between? ;
839 : next-stack@ ( n -- operand )
840 #! nth parameter from the next stack frame. Used to box
841 #! input values to callbacks; the callback has its own
842 #! stack frame set up, and we want to read the frame
843 #! set up by the caller.
844 stack-frame get total-size>> + stack@ ;
847 enable-float-intrinsics
858 :: install-sse-check ( version -- )
860 sse-version version < [
861 "This image was built to use " write
862 version sse-string write
863 " but your CPU only supports " write
864 sse-version sse-string write "." print
865 "You will need to bootstrap Factor again." print
869 ] "cpu.x86" add-init-hook ;
871 : enable-sse ( version -- )
875 { 20 [ enable-sse2 ] }
876 { 30 [ enable-sse3 ] }
877 { 33 [ enable-sse3 ] }
878 { 41 [ enable-sse3 ] }
879 { 42 [ enable-sse3 ] }
882 [ { sse_version } compile ] with-optimizer
884 "Checking for multimedia extensions: " write sse-version 30 min
885 [ sse-string write " detected" print ]
886 [ install-sse-check ]