basis/cpu/x86/x86.factor

   1 ! Copyright (C) 2005, 2009 Slava Pestov.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 <<<<<<< HEAD
   4 USING: accessors assocs alien alien.c-types arrays strings
   5 cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
   6 cpu.architecture kernel kernel.private math memory namespaces make
   7 sequences words system layouts combinators math.order fry locals
   8 compiler.constants byte-arrays
   9 compiler.cfg.registers
  10 compiler.cfg.instructions
  11 compiler.cfg.intrinsics
  12 compiler.cfg.comparisons
  13 compiler.cfg.stack-frame
  14 compiler.codegen
  15 compiler.codegen.fixup ;
  16 =======
  17 USING: accessors alien combinators compiler.cfg.comparisons
  18 compiler.cfg.intrinsics compiler.cfg.registers
  19 compiler.cfg.stack-frame compiler.codegen.fixup compiler.constants
  20 cpu.architecture cpu.x86.assembler cpu.x86.assembler.operands fry
  21 kernel layouts locals make math math.order namespaces sequences system
  22 vm ;
  23 >>>>>>> Added a vm C-STRUCT, using it for struct offsets in x86 asm
  24 IN: cpu.x86
  25
  26 << enable-fixnum-log2 >>
  27
  28 ! Add some methods to the assembler to be more useful to the backend
  29 M: label JMP 0 JMP rc-relative label-fixup ;
  30 M: label JUMPcc [ 0 ] dip JUMPcc rc-relative label-fixup ;
  31
  32 M: x86 two-operand? t ;
  33
  34 HOOK: stack-reg cpu ( -- reg )
  35
  36 HOOK: reserved-area-size cpu ( -- n )
  37
  38 : stack@ ( n -- op ) stack-reg swap [+] ;
  39
  40 : param@ ( n -- op ) reserved-area-size + stack@ ;
  41
  42 : spill@ ( n -- op ) spill-offset param@ ;
  43
  44 : gc-root@ ( n -- op ) gc-root-offset param@ ;
  45
  46 : decr-stack-reg ( n -- )
  47     dup 0 = [ drop ] [ stack-reg swap SUB ] if ;
  48
  49 : incr-stack-reg ( n -- )
  50     dup 0 = [ drop ] [ stack-reg swap ADD ] if ;
  51
  52 : align-stack ( n -- n' )
  53     os macosx? cpu x86.64? or [ 16 align ] when ;
  54
  55 M: x86 stack-frame-size ( stack-frame -- i )
  56     (stack-frame-size) 3 cells reserved-area-size + + align-stack ;
  57
  58 ! Must be a volatile register not used for parameter passing, for safe
  59 ! use in calls in and out of C
  60 HOOK: temp-reg cpu ( -- reg )
  61
  62 ! Fastcall calling convention
  63 HOOK: param-reg-1 cpu ( -- reg )
  64 HOOK: param-reg-2 cpu ( -- reg )
  65
  66 HOOK: pic-tail-reg cpu ( -- reg )
  67
  68 M: x86 %load-immediate dup 0 = [ drop dup XOR ] [ MOV ] if ;
  69
  70 M: x86 %load-reference swap 0 MOV rc-absolute-cell rel-immediate ;
  71
  72 HOOK: ds-reg cpu ( -- reg )
  73 HOOK: rs-reg cpu ( -- reg )
  74
  75 : reg-stack ( n reg -- op ) swap cells neg [+] ;
  76
  77 GENERIC: loc>operand ( loc -- operand )
  78
  79 M: ds-loc loc>operand n>> ds-reg reg-stack ;
  80 M: rs-loc loc>operand n>> rs-reg reg-stack ;
  81
  82 M: x86 %peek loc>operand MOV ;
  83 M: x86 %replace loc>operand swap MOV ;
  84 : (%inc) ( n reg -- ) swap cells dup 0 > [ ADD ] [ neg SUB ] if ; inline
  85 M: x86 %inc-d ( n -- ) ds-reg (%inc) ;
  86 M: x86 %inc-r ( n -- ) rs-reg (%inc) ;
  87
  88 M: x86 %call ( word -- ) 0 CALL rc-relative rel-word-pic ;
  89
  90 : xt-tail-pic-offset ( -- n )
  91     #! See the comment in vm/cpu-x86.hpp
  92     cell 4 + 1 + ; inline
  93
  94 M: x86 %jump ( word -- )
  95     pic-tail-reg 0 MOV xt-tail-pic-offset rc-absolute-cell rel-here
  96     0 JMP rc-relative rel-word-pic-tail ;
  97
  98 M: x86 %jump-label ( label -- ) 0 JMP rc-relative label-fixup ;
  99
 100 M: x86 %return ( -- ) 0 RET ;
 101
 102 : code-alignment ( align -- n )
 103     [ building get length dup ] dip align swap - ;
 104
 105 : align-code ( n -- )
 106     0 <repetition> % ;
 107
 108 :: (%slot) ( obj slot tag temp -- op )
 109     temp slot obj [+] LEA
 110     temp tag neg [+] ; inline
 111
 112 :: (%slot-imm) ( obj slot tag -- op )
 113     obj slot cells tag - [+] ; inline
 114
 115 M: x86 %slot ( dst obj slot tag temp -- ) (%slot) MOV ;
 116 M: x86 %slot-imm ( dst obj slot tag -- ) (%slot-imm) MOV ;
 117 M: x86 %set-slot ( src obj slot tag temp -- ) (%slot) swap MOV ;
 118 M: x86 %set-slot-imm ( src obj slot tag -- ) (%slot-imm) swap MOV ;
 119
 120 M: x86 %add     2over eq? [ nip ADD ] [ [+] LEA ] if ;
 121 M: x86 %add-imm 2over eq? [ nip ADD ] [ [+] LEA ] if ;
 122 M: x86 %sub     nip SUB ;
 123 M: x86 %sub-imm 2over eq? [ nip SUB ] [ neg [+] LEA ] if ;
 124 M: x86 %mul     nip swap IMUL2 ;
 125 M: x86 %mul-imm IMUL3 ;
 126 M: x86 %and     nip AND ;
 127 M: x86 %and-imm nip AND ;
 128 M: x86 %or      nip OR ;
 129 M: x86 %or-imm  nip OR ;
 130 M: x86 %xor     nip XOR ;
 131 M: x86 %xor-imm nip XOR ;
 132 M: x86 %shl-imm nip SHL ;
 133 M: x86 %shr-imm nip SHR ;
 134 M: x86 %sar-imm nip SAR ;
 135
 136 M: x86 %min     nip [ CMP ] [ CMOVG ] 2bi ;
 137 M: x86 %max     nip [ CMP ] [ CMOVL ] 2bi ;
 138
 139 M: x86 %not     drop NOT ;
 140 M: x86 %log2    BSR ;
 141
 142 GENERIC: copy-register* ( dst src rep -- )
 143
 144 M: int-rep copy-register* drop MOV ;
 145 M: tagged-rep copy-register* drop MOV ;
 146 M: float-rep copy-register* drop MOVSS ;
 147 M: double-rep copy-register* drop MOVSD ;
 148 M: float-4-rep copy-register* drop MOVUPS ;
 149 M: double-2-rep copy-register* drop MOVUPD ;
 150 M: vector-rep copy-register* drop MOVDQU ;
 151
 152 : copy-register ( dst src rep -- )
 153     2over eq? [ 3drop ] [ copy-register* ] if ;
 154
 155 M: x86 %copy ( dst src rep -- ) copy-register ;
 156
 157 :: overflow-template ( label dst src1 src2 insn -- )
 158     src1 src2 insn call
 159     label JO ; inline
 160
 161 M: x86 %fixnum-add ( label dst src1 src2 -- )
 162     [ ADD ] overflow-template ;
 163
 164 M: x86 %fixnum-sub ( label dst src1 src2 -- )
 165     [ SUB ] overflow-template ;
 166
 167 M: x86 %fixnum-mul ( label dst src1 src2 -- )
 168     [ swap IMUL2 ] overflow-template ;
 169
 170 : bignum@ ( reg n -- op )
 171     cells bignum tag-number - [+] ; inline
 172
 173 M:: x86 %integer>bignum ( dst src temp -- )
 174     #! on entry, inreg is a signed 32-bit quantity
 175     #! exits with tagged ptr to bignum in outreg
 176     #! 1 cell header, 1 cell length, 1 cell sign, + digits
 177     #! length is the # of digits + sign
 178     [
 179         "end" define-label
 180         ! Load cached zero value
 181         dst 0 >bignum %load-reference
 182         src 0 CMP
 183         ! Is it zero? Then just go to the end and return this zero
 184         "end" get JE
 185         ! Allocate a bignum
 186         dst 4 cells bignum temp %allot
 187         ! Write length
 188         dst 1 bignum@ 2 tag-fixnum MOV
 189         ! Store value
 190         dst 3 bignum@ src MOV
 191         ! Compute sign
 192         temp src MOV
 193         temp cell-bits 1 - SAR
 194         temp 1 AND
 195         ! Store sign
 196         dst 2 bignum@ temp MOV
 197         ! Make negative value positive
 198         temp temp ADD
 199         temp NEG
 200         temp 1 ADD
 201         src temp IMUL2
 202         ! Store the bignum
 203         dst 3 bignum@ temp MOV
 204         "end" resolve-label
 205     ] with-scope ;
 206
 207 M:: x86 %bignum>integer ( dst src temp -- )
 208     [
 209         "end" define-label
 210         ! load length
 211         temp src 1 bignum@ MOV
 212         ! if the length is 1, its just the sign and nothing else,
 213         ! so output 0
 214         dst 0 MOV
 215         temp 1 tag-fixnum CMP
 216         "end" get JE
 217         ! load the value
 218         dst src 3 bignum@ MOV
 219         ! load the sign
 220         temp src 2 bignum@ MOV
 221         ! convert it into -1 or 1
 222         temp temp ADD
 223         temp NEG
 224         temp 1 ADD
 225         ! make dst signed
 226         temp dst IMUL2
 227         "end" resolve-label
 228     ] with-scope ;
 229
 230 M: x86 %add-float nip ADDSD ;
 231 M: x86 %sub-float nip SUBSD ;
 232 M: x86 %mul-float nip MULSD ;
 233 M: x86 %div-float nip DIVSD ;
 234 M: x86 %min-float nip MINSD ;
 235 M: x86 %max-float nip MAXSD ;
 236 M: x86 %sqrt SQRTSD ;
 237
 238 M: x86 %single>double-float CVTSS2SD ;
 239 M: x86 %double>single-float CVTSD2SS ;
 240
 241 M: x86 %integer>float CVTSI2SD ;
 242 M: x86 %float>integer CVTTSD2SI ;
 243
 244 M: x86 %unbox-float ( dst src -- )
 245     float-offset [+] MOVSD ;
 246
 247 M:: x86 %box-float ( dst src temp -- )
 248     dst 16 float temp %allot
 249     dst float-offset [+] src MOVSD ;
 250
 251 M:: x86 %box-vector ( dst src rep temp -- )
 252     dst rep rep-size 2 cells + byte-array temp %allot
 253     16 tag-fixnum dst 1 byte-array tag-number %set-slot-imm
 254     dst byte-array-offset [+]
 255     src rep copy-register ;
 256
 257 M:: x86 %unbox-vector ( dst src rep -- )
 258     dst src byte-array-offset [+]
 259     rep copy-register ;
 260
 261 M: x86 %broadcast-vector ( dst src rep -- )
 262     {
 263         { float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] }
 264         { double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] }
 265     } case ;
 266
 267 M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
 268     rep {
 269         {
 270             float-4-rep
 271             [
 272                 dst src1 MOVSS
 273                 dst src2 UNPCKLPS
 274                 src3 src4 UNPCKLPS
 275                 dst src3 MOVLHPS
 276             ]
 277         }
 278     } case ;
 279
 280 M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
 281     rep {
 282         {
 283             double-2-rep
 284             [
 285                 dst src1 MOVSD
 286                 dst src2 UNPCKLPD
 287             ]
 288         }
 289     } case ;
 290
 291 M: x86 %add-vector ( dst src1 src2 rep -- )
 292     {
 293         { float-4-rep [ ADDPS ] }
 294         { double-2-rep [ ADDPD ] }
 295         { char-16-rep [ PADDB ] }
 296         { uchar-16-rep [ PADDB ] }
 297         { short-8-rep [ PADDW ] }
 298         { ushort-8-rep [ PADDW ] }
 299         { int-4-rep [ PADDD ] }
 300         { uint-4-rep [ PADDD ] }
 301     } case drop ;
 302
 303 M: x86 %sub-vector ( dst src1 src2 rep -- )
 304     {
 305         { float-4-rep [ SUBPS ] }
 306         { double-2-rep [ SUBPD ] }
 307         { char-16-rep [ PSUBB ] }
 308         { uchar-16-rep [ PSUBB ] }
 309         { short-8-rep [ PSUBW ] }
 310         { ushort-8-rep [ PSUBW ] }
 311         { int-4-rep [ PSUBD ] }
 312         { uint-4-rep [ PSUBD ] }
 313     } case drop ;
 314
 315 M: x86 %mul-vector ( dst src1 src2 rep -- )
 316     {
 317         { float-4-rep [ MULPS ] }
 318         { double-2-rep [ MULPD ] }
 319         { int-4-rep [ PMULLW ] }
 320     } case drop ;
 321
 322 M: x86 %div-vector ( dst src1 src2 rep -- )
 323     {
 324         { float-4-rep [ DIVPS ] }
 325         { double-2-rep [ DIVPD ] }
 326     } case drop ;
 327
 328 M: x86 %min-vector ( dst src1 src2 rep -- )
 329     {
 330         { float-4-rep [ MINPS ] }
 331         { double-2-rep [ MINPD ] }
 332     } case drop ;
 333
 334 M: x86 %max-vector ( dst src1 src2 rep -- )
 335     {
 336         { float-4-rep [ MAXPS ] }
 337         { double-2-rep [ MAXPD ] }
 338     } case drop ;
 339
 340 M: x86 %sqrt-vector ( dst src rep -- )
 341     {
 342         { float-4-rep [ SQRTPS ] }
 343         { double-2-rep [ SQRTPD ] }
 344     } case ;
 345
 346 M: x86 %horizontal-add-vector ( dst src rep -- )
 347     {
 348         { float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
 349         { double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
 350     } case ;
 351
 352 M: x86 %unbox-alien ( dst src -- )
 353     alien-offset [+] MOV ;
 354
 355 M:: x86 %unbox-any-c-ptr ( dst src temp -- )
 356     [
 357         { "is-byte-array" "end" "start" } [ define-label ] each
 358         dst 0 MOV
 359         temp src MOV
 360         ! We come back here with displaced aliens
 361         "start" resolve-label
 362         ! Is the object f?
 363         temp \ f tag-number CMP
 364         "end" get JE
 365         ! Is the object an alien?
 366         temp header-offset [+] alien type-number tag-fixnum CMP
 367         "is-byte-array" get JNE
 368         ! If so, load the offset and add it to the address
 369         dst temp alien-offset [+] ADD
 370         ! Now recurse on the underlying alien
 371         temp temp underlying-alien-offset [+] MOV
 372         "start" get JMP
 373         "is-byte-array" resolve-label
 374         ! Add byte array address to address being computed
 375         dst temp ADD
 376         ! Add an offset to start of byte array's data
 377         dst byte-array-offset ADD
 378         "end" resolve-label
 379     ] with-scope ;
 380
 381 : alien@ ( reg n -- op ) cells alien tag-number - [+] ;
 382
 383 :: %allot-alien ( dst displacement base temp -- )
 384     dst 4 cells alien temp %allot
 385     dst 1 alien@ base MOV ! alien
 386     dst 2 alien@ \ f tag-number MOV ! expired
 387     dst 3 alien@ displacement MOV ! displacement
 388     ;
 389
 390 M:: x86 %box-alien ( dst src temp -- )
 391     [
 392         "end" define-label
 393         dst \ f tag-number MOV
 394         src 0 CMP
 395         "end" get JE
 396         dst src \ f tag-number temp %allot-alien
 397         "end" resolve-label
 398     ] with-scope ;
 399
 400 M:: x86 %box-displaced-alien ( dst displacement base displacement' base' base-class -- )
 401     [
 402         "end" define-label
 403         "ok" define-label
 404         ! If displacement is zero, return the base
 405         dst base MOV
 406         displacement 0 CMP
 407         "end" get JE
 408         ! Quickly use displacement' before its needed for real, as allot temporary
 409         dst 4 cells alien displacement' %allot
 410         ! If base is already a displaced alien, unpack it
 411         base' base MOV
 412         displacement' displacement MOV
 413         base \ f tag-number CMP
 414         "ok" get JE
 415         base header-offset [+] alien type-number tag-fixnum CMP
 416         "ok" get JNE
 417         ! displacement += base.displacement
 418         displacement' base 3 alien@ ADD
 419         ! base = base.base
 420         base' base 1 alien@ MOV
 421         "ok" resolve-label
 422         dst 1 alien@ base' MOV ! alien
 423         dst 2 alien@ \ f tag-number MOV ! expired
 424         dst 3 alien@ displacement' MOV ! displacement
 425         "end" resolve-label
 426     ] with-scope ;
 427
 428 ! The 'small-reg' mess is pretty crappy, but its only used on x86-32.
 429 ! On x86-64, all registers have 8-bit versions. However, a similar
 430 ! problem arises for shifts, where the shift count must be in CL, and
 431 ! so one day I will fix this properly by adding precoloring to the
 432 ! register allocator.
 433
 434 HOOK: has-small-reg? cpu ( reg size -- ? )
 435
 436 CONSTANT: have-byte-regs { EAX ECX EDX EBX }
 437
 438 M: x86.32 has-small-reg?
 439     {
 440         { 8 [ have-byte-regs memq? ] }
 441         { 16 [ drop t ] }
 442         { 32 [ drop t ] }
 443     } case ;
 444
 445 M: x86.64 has-small-reg? 2drop t ;
 446
 447 : small-reg-that-isn't ( exclude -- reg' )
 448     [ have-byte-regs ] dip
 449     [ native-version-of ] map
 450     '[ _ memq? not ] find nip ;
 451
 452 : with-save/restore ( reg quot -- )
 453     [ drop PUSH ] [ call ] [ drop POP ] 2tri ; inline
 454
 455 :: with-small-register ( dst exclude size quot: ( new-dst -- ) -- )
 456     ! If the destination register overlaps a small register with
 457     ! 'size' bits, we call the quot with that. Otherwise, we find a
 458     ! small register that is not in exclude, and call quot, saving and
 459     ! restoring the small register.
 460     dst size has-small-reg? [ dst quot call ] [
 461         exclude small-reg-that-isn't
 462         [ quot call ] with-save/restore
 463     ] if ; inline
 464
 465 : ?MOV ( dst src -- )
 466     2dup = [ 2drop ] [ MOV ] if ; inline
 467
 468 M:: x86 %string-nth ( dst src index temp -- )
 469     ! We request a small-reg of size 8 since those of size 16 are
 470     ! a superset.
 471     "end" define-label
 472     dst { src index temp } 8 [| new-dst |
 473         ! Load the least significant 7 bits into new-dst.
 474         ! 8th bit indicates whether we have to load from
 475         ! the aux vector or not.
 476         temp src index [+] LEA
 477         new-dst 8-bit-version-of temp string-offset [+] MOV
 478         new-dst new-dst 8-bit-version-of MOVZX
 479         ! Do we have to look at the aux vector?
 480         new-dst HEX: 80 CMP
 481         "end" get JL
 482         ! Yes, this is a non-ASCII character. Load aux vector
 483         temp src string-aux-offset [+] MOV
 484         new-dst temp XCHG
 485         ! Compute index
 486         new-dst index ADD
 487         new-dst index ADD
 488         ! Load high 16 bits
 489         new-dst 16-bit-version-of new-dst byte-array-offset [+] MOV
 490         new-dst new-dst 16-bit-version-of MOVZX
 491         new-dst 7 SHL
 492         ! Compute code point
 493         new-dst temp XOR
 494         "end" resolve-label
 495         dst new-dst ?MOV
 496     ] with-small-register ;
 497
 498 M:: x86 %set-string-nth-fast ( ch str index temp -- )
 499     ch { index str temp } 8 [| new-ch |
 500         new-ch ch ?MOV
 501         temp str index [+] LEA
 502         temp string-offset [+] new-ch 8-bit-version-of MOV
 503     ] with-small-register ;
 504
 505 :: %alien-integer-getter ( dst src size quot -- )
 506     dst { src } size [| new-dst |
 507         new-dst dup size n-bit-version-of dup src [] MOV
 508         quot call
 509         dst new-dst ?MOV
 510     ] with-small-register ; inline
 511
 512 : %alien-unsigned-getter ( dst src size -- )
 513     [ MOVZX ] %alien-integer-getter ; inline
 514
 515 M: x86 %alien-unsigned-1 8 %alien-unsigned-getter ;
 516 M: x86 %alien-unsigned-2 16 %alien-unsigned-getter ;
 517 M: x86 %alien-unsigned-4 32 [ 2drop ] %alien-integer-getter ;
 518
 519 : %alien-signed-getter ( dst src size -- )
 520     [ MOVSX ] %alien-integer-getter ; inline
 521
 522 M: x86 %alien-signed-1 8 %alien-signed-getter ;
 523 M: x86 %alien-signed-2 16 %alien-signed-getter ;
 524 M: x86 %alien-signed-4 32 %alien-signed-getter ;
 525
 526 M: x86 %alien-cell [] MOV ;
 527 M: x86 %alien-float [] MOVSS ;
 528 M: x86 %alien-double [] MOVSD ;
 529 M: x86 %alien-vector [ [] ] dip copy-register ;
 530
 531 :: %alien-integer-setter ( ptr value size -- )
 532     value { ptr } size [| new-value |
 533         new-value value ?MOV
 534         ptr [] new-value size n-bit-version-of MOV
 535     ] with-small-register ; inline
 536
 537 M: x86 %set-alien-integer-1 8 %alien-integer-setter ;
 538 M: x86 %set-alien-integer-2 16 %alien-integer-setter ;
 539 M: x86 %set-alien-integer-4 32 %alien-integer-setter ;
 540 M: x86 %set-alien-cell [ [] ] dip MOV ;
 541 M: x86 %set-alien-float [ [] ] dip MOVSS ;
 542 M: x86 %set-alien-double [ [] ] dip MOVSD ;
 543 M: x86 %set-alien-vector [ [] ] 2dip copy-register ;
 544
 545 : shift-count? ( reg -- ? ) { ECX RCX } memq? ;
 546
 547 :: emit-shift ( dst src1 src2 quot -- )
 548     src2 shift-count? [
 549         dst CL quot call
 550     ] [
 551         dst shift-count? [
 552             dst src2 XCHG
 553             src2 CL quot call
 554             dst src2 XCHG
 555         ] [
 556             ECX native-version-of [
 557                 CL src2 MOV
 558                 drop dst CL quot call
 559             ] with-save/restore
 560         ] if
 561     ] if ; inline
 562
 563 M: x86 %shl [ SHL ] emit-shift ;
 564 M: x86 %shr [ SHR ] emit-shift ;
 565 M: x86 %sar [ SAR ] emit-shift ;
 566
 567 : load-zone-ptr ( reg -- )
 568     #! Load pointer to start of zone array
 569     [ 0 MOV rc-absolute-cell rt-vm rel-fixup ]
 570     [ "nursery" vm-field-offset ADD ] bi ;
 571
 572 : load-allot-ptr ( nursery-ptr allot-ptr -- )
 573     [ drop load-zone-ptr ] [ swap cell [+] MOV ] 2bi ;
 574
 575 : inc-allot-ptr ( nursery-ptr n -- )
 576     [ cell [+] ] dip 8 align ADD ;
 577
 578 : store-header ( temp class -- )
 579     [ [] ] [ type-number tag-fixnum ] bi* MOV ;
 580
 581 : store-tagged ( dst tag -- )
 582     tag-number OR ;
 583
 584 M:: x86 %allot ( dst size class nursery-ptr -- )
 585     nursery-ptr dst load-allot-ptr
 586     dst class store-header
 587     dst class store-tagged
 588     nursery-ptr size inc-allot-ptr ;
 589
 590 : %vm-field-ptr ( reg field -- )
 591     [ drop 0 MOV rc-absolute-cell rt-vm rel-fixup ]
 592     [ vm-field-offset ADD ] 2bi ;
 593
 594 M:: x86 %write-barrier ( src card# table -- )
 595     #! Mark the card pointed to by vreg.
 596     ! Mark the card
 597     card# src MOV
 598     card# card-bits SHR
 599     table "cards_offset" %vm-field-ptr
 600     table table [] MOV
 601     table card# [+] card-mark <byte> MOV
 602
 603     ! Mark the card deck
 604     card# deck-bits card-bits - SHR
 605     table "decks_offset" %vm-field-ptr
 606     table table [] MOV
 607     table card# [+] card-mark <byte> MOV ;
 608
 609 M:: x86 %check-nursery ( label temp1 temp2 -- )
 610     temp1 load-zone-ptr
 611     temp2 temp1 cell [+] MOV
 612     temp2 1024 ADD
 613     temp1 temp1 3 cells [+] MOV
 614     temp2 temp1 CMP
 615     label JLE ;
 616
 617 M: x86 %save-gc-root ( gc-root register -- ) [ gc-root@ ] dip MOV ;
 618
 619 M: x86 %load-gc-root ( gc-root register -- ) swap gc-root@ MOV ;
 620
 621 M:: x86 %call-gc ( gc-root-count -- )
 622     ! Pass pointer to start of GC roots as first parameter
 623     param-reg-1 gc-root-base param@ LEA
 624     ! Pass number of roots as second parameter
 625     param-reg-2 gc-root-count MOV
 626     ! Call GC
 627     "inline_gc" f %vm-invoke ;
 628
 629 M: x86 %alien-global ( dst symbol library -- )
 630     [ 0 MOV ] 2dip rc-absolute-cell rel-dlsym ;
 631
 632 M: x86 %epilogue ( n -- ) cell - incr-stack-reg ;
 633
 634 :: %boolean ( dst temp word -- )
 635     dst \ f tag-number MOV
 636     temp 0 MOV \ t rc-absolute-cell rel-immediate
 637     dst temp word execute ; inline
 638
 639 M:: x86 %compare ( dst src1 src2 cc temp -- )
 640     src1 src2 CMP
 641     cc order-cc {
 642         { cc<  [ dst temp \ CMOVL %boolean ] }
 643         { cc<= [ dst temp \ CMOVLE %boolean ] }
 644         { cc>  [ dst temp \ CMOVG %boolean ] }
 645         { cc>= [ dst temp \ CMOVGE %boolean ] }
 646         { cc=  [ dst temp \ CMOVE %boolean ] }
 647         { cc/= [ dst temp \ CMOVNE %boolean ] }
 648     } case ;
 649
 650 M: x86 %compare-imm ( dst src1 src2 cc temp -- )
 651     %compare ;
 652
 653 : %cmov-float= ( dst src -- )
 654     [
 655         "no-move" define-label
 656
 657         "no-move" get [ JNE ] [ JP ] bi
 658         MOV
 659         "no-move" resolve-label
 660     ] with-scope ;
 661
 662 : %cmov-float/= ( dst src -- )
 663     [
 664         "no-move" define-label
 665         "move" define-label
 666
 667         "move" get JP
 668         "no-move" get JE
 669         "move" resolve-label
 670         MOV
 671         "no-move" resolve-label
 672     ] with-scope ;
 673
 674 :: (%compare-float) ( dst src1 src2 cc temp compare -- )
 675     cc {
 676         { cc<    [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVA  %boolean ] }
 677         { cc<=   [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
 678         { cc>    [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVA  %boolean ] }
 679         { cc>=   [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
 680         { cc=    [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float= %boolean ] }
 681         { cc<>   [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNE %boolean ] }
 682         { cc<>=  [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNP %boolean ] }
 683         { cc/<   [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
 684         { cc/<=  [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVB  %boolean ] }
 685         { cc/>   [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
 686         { cc/>=  [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVB  %boolean ] }
 687         { cc/=   [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float/= %boolean ] }
 688         { cc/<>  [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVE  %boolean ] }
 689         { cc/<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVP  %boolean ] }
 690     } case ; inline
 691
 692 M: x86 %compare-float-ordered ( dst src1 src2 cc temp -- )
 693     \ COMISD (%compare-float) ;
 694
 695 M: x86 %compare-float-unordered ( dst src1 src2 cc temp -- )
 696     \ UCOMISD (%compare-float) ;
 697
 698 M:: x86 %compare-branch ( label src1 src2 cc -- )
 699     src1 src2 CMP
 700     cc order-cc {
 701         { cc<  [ label JL ] }
 702         { cc<= [ label JLE ] }
 703         { cc>  [ label JG ] }
 704         { cc>= [ label JGE ] }
 705         { cc=  [ label JE ] }
 706         { cc/= [ label JNE ] }
 707     } case ;
 708
 709 M: x86 %compare-imm-branch ( label src1 src2 cc -- )
 710     %compare-branch ;
 711
 712 : %jump-float= ( label -- )
 713     [
 714         "no-jump" define-label
 715         "no-jump" get JP
 716         JE
 717         "no-jump" resolve-label
 718     ] with-scope ;
 719
 720 : %jump-float/= ( label -- )
 721     [ JNE ] [ JP ] bi ;
 722
 723 :: (%compare-float-branch) ( label src1 src2 cc compare -- )
 724     cc {
 725         { cc<    [ src2 src1 \ compare execute( a b -- ) label JA  ] }
 726         { cc<=   [ src2 src1 \ compare execute( a b -- ) label JAE ] }
 727         { cc>    [ src1 src2 \ compare execute( a b -- ) label JA  ] }
 728         { cc>=   [ src1 src2 \ compare execute( a b -- ) label JAE ] }
 729         { cc=    [ src1 src2 \ compare execute( a b -- ) label %jump-float= ] }
 730         { cc<>   [ src1 src2 \ compare execute( a b -- ) label JNE ] }
 731         { cc<>=  [ src1 src2 \ compare execute( a b -- ) label JNP ] }
 732         { cc/<   [ src2 src1 \ compare execute( a b -- ) label JBE ] }
 733         { cc/<=  [ src2 src1 \ compare execute( a b -- ) label JB  ] }
 734         { cc/>   [ src1 src2 \ compare execute( a b -- ) label JBE ] }
 735         { cc/>=  [ src1 src2 \ compare execute( a b -- ) label JB  ] }
 736         { cc/=   [ src1 src2 \ compare execute( a b -- ) label %jump-float/= ] }
 737         { cc/<>  [ src1 src2 \ compare execute( a b -- ) label JE  ] }
 738         { cc/<>= [ src1 src2 \ compare execute( a b -- ) label JP  ] }
 739     } case ;
 740
 741 M: x86 %compare-float-ordered-branch ( label src1 src2 cc -- )
 742     \ COMISD (%compare-float-branch) ;
 743
 744 M: x86 %compare-float-unordered-branch ( label src1 src2 cc -- )
 745     \ UCOMISD (%compare-float-branch) ;
 746
 747 M:: x86 %spill ( src rep n -- )
 748     n spill@ src rep copy-register ;
 749
 750 M:: x86 %reload ( dst rep n -- )
 751     dst n spill@ rep copy-register ;
 752
 753 M: x86 %loop-entry 16 code-alignment [ NOP ] times ;
 754
 755 M:: x86 %save-context ( temp1 temp2 callback-allowed? -- )
 756     #! Save Factor stack pointers in case the C code calls a
 757     #! callback which does a GC, which must reliably trace
 758     #! all roots.
 759     temp1 0 MOV rc-absolute-cell rt-vm rel-fixup
 760     temp1 temp1 "stack_chain" vm-field-offset [+] MOV
 761     temp2 stack-reg cell neg [+] LEA
 762     temp1 [] temp2 MOV
 763     callback-allowed? [
 764         temp1 2 cells [+] ds-reg MOV
 765         temp1 3 cells [+] rs-reg MOV
 766     ] when ;
 767
 768 M: x86 value-struct? drop t ;
 769
 770 M: x86 small-enough? ( n -- ? )
 771     HEX: -80000000 HEX: 7fffffff between? ;
 772
 773 : next-stack@ ( n -- operand )
 774     #! nth parameter from the next stack frame. Used to box
 775     #! input values to callbacks; the callback has its own
 776     #! stack frame set up, and we want to read the frame
 777     #! set up by the caller.
 778     stack-frame get total-size>> + stack@ ;
 779
 780 : enable-sse2 ( -- )
 781     enable-float-intrinsics
 782     enable-fsqrt
 783     enable-float-min/max
 784     enable-sse2-simd ;
 785
 786 : enable-sse3 ( -- )
 787     enable-sse2
 788     enable-sse3-simd ;
 789
 790 enable-min/max