basis/cpu/x86/x86.factor

   1 ! Copyright (C) 2005, 2009 Slava Pestov.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: accessors assocs alien alien.c-types arrays strings
   4 cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
   5 cpu.architecture kernel kernel.private math memory namespaces make
   6 sequences words system layouts combinators math.order fry locals
   7 compiler.constants vm byte-arrays
   8 compiler.cfg.registers
   9 compiler.cfg.instructions
  10 compiler.cfg.intrinsics
  11 compiler.cfg.comparisons
  12 compiler.cfg.stack-frame
  13 compiler.codegen.fixup ;
  14 FROM: layouts => cell ;
  15 FROM: math => float ;
  16 IN: cpu.x86
  17
  18 ! Add some methods to the assembler to be more useful to the backend
  19 M: label JMP 0 JMP rc-relative label-fixup ;
  20 M: label JUMPcc [ 0 ] dip JUMPcc rc-relative label-fixup ;
  21
  22 M: x86 two-operand? t ;
  23
  24 HOOK: stack-reg cpu ( -- reg )
  25
  26 HOOK: reserved-area-size cpu ( -- n )
  27
  28 : stack@ ( n -- op ) stack-reg swap [+] ;
  29
  30 : param@ ( n -- op ) reserved-area-size + stack@ ;
  31
  32 : spill@ ( n -- op ) spill-offset param@ ;
  33
  34 : gc-root@ ( n -- op ) gc-root-offset param@ ;
  35
  36 : decr-stack-reg ( n -- )
  37     dup 0 = [ drop ] [ stack-reg swap SUB ] if ;
  38
  39 : incr-stack-reg ( n -- )
  40     dup 0 = [ drop ] [ stack-reg swap ADD ] if ;
  41
  42 : align-stack ( n -- n' )
  43     os macosx? cpu x86.64? or [ 16 align ] when ;
  44
  45 M: x86 stack-frame-size ( stack-frame -- i )
  46     (stack-frame-size) 3 cells reserved-area-size + + align-stack ;
  47
  48 ! Must be a volatile register not used for parameter passing, for safe
  49 ! use in calls in and out of C
  50 HOOK: temp-reg cpu ( -- reg )
  51
  52 ! Fastcall calling convention
  53 HOOK: param-reg-1 cpu ( -- reg )
  54 HOOK: param-reg-2 cpu ( -- reg )
  55
  56 HOOK: pic-tail-reg cpu ( -- reg )
  57
  58 M: x86 %load-immediate dup 0 = [ drop dup XOR ] [ MOV ] if ;
  59
  60 M: x86 %load-reference swap 0 MOV rc-absolute-cell rel-immediate ;
  61
  62 HOOK: ds-reg cpu ( -- reg )
  63 HOOK: rs-reg cpu ( -- reg )
  64
  65 : reg-stack ( n reg -- op ) swap cells neg [+] ;
  66
  67 GENERIC: loc>operand ( loc -- operand )
  68
  69 M: ds-loc loc>operand n>> ds-reg reg-stack ;
  70 M: rs-loc loc>operand n>> rs-reg reg-stack ;
  71
  72 M: x86 %peek loc>operand MOV ;
  73 M: x86 %replace loc>operand swap MOV ;
  74 : (%inc) ( n reg -- ) swap cells dup 0 > [ ADD ] [ neg SUB ] if ; inline
  75 M: x86 %inc-d ( n -- ) ds-reg (%inc) ;
  76 M: x86 %inc-r ( n -- ) rs-reg (%inc) ;
  77
  78 M: x86 %call ( word -- ) 0 CALL rc-relative rel-word-pic ;
  79
  80 : xt-tail-pic-offset ( -- n )
  81     #! See the comment in vm/cpu-x86.hpp
  82     cell 4 + 1 + ; inline
  83
  84 M: x86 %jump ( word -- )
  85     pic-tail-reg 0 MOV xt-tail-pic-offset rc-absolute-cell rel-here
  86     0 JMP rc-relative rel-word-pic-tail ;
  87
  88 M: x86 %jump-label ( label -- ) 0 JMP rc-relative label-fixup ;
  89
  90 M: x86 %return ( -- ) 0 RET ;
  91
  92 : code-alignment ( align -- n )
  93     [ building get length dup ] dip align swap - ;
  94
  95 : align-code ( n -- )
  96     0 <repetition> % ;
  97
  98 :: (%slot) ( obj slot tag temp -- op )
  99     temp slot obj [+] LEA
 100     temp tag neg [+] ; inline
 101
 102 :: (%slot-imm) ( obj slot tag -- op )
 103     obj slot cells tag - [+] ; inline
 104
 105 M: x86 %slot ( dst obj slot tag temp -- ) (%slot) MOV ;
 106 M: x86 %slot-imm ( dst obj slot tag -- ) (%slot-imm) MOV ;
 107 M: x86 %set-slot ( src obj slot tag temp -- ) (%slot) swap MOV ;
 108 M: x86 %set-slot-imm ( src obj slot tag -- ) (%slot-imm) swap MOV ;
 109
 110 M: x86 %add     2over eq? [ nip ADD ] [ [+] LEA ] if ;
 111 M: x86 %add-imm 2over eq? [ nip ADD ] [ [+] LEA ] if ;
 112 M: x86 %sub     nip SUB ;
 113 M: x86 %sub-imm 2over eq? [ nip SUB ] [ neg [+] LEA ] if ;
 114 M: x86 %mul     nip swap IMUL2 ;
 115 M: x86 %mul-imm IMUL3 ;
 116 M: x86 %and     nip AND ;
 117 M: x86 %and-imm nip AND ;
 118 M: x86 %or      nip OR ;
 119 M: x86 %or-imm  nip OR ;
 120 M: x86 %xor     nip XOR ;
 121 M: x86 %xor-imm nip XOR ;
 122 M: x86 %shl-imm nip SHL ;
 123 M: x86 %shr-imm nip SHR ;
 124 M: x86 %sar-imm nip SAR ;
 125
 126 M: x86 %min     nip [ CMP ] [ CMOVG ] 2bi ;
 127 M: x86 %max     nip [ CMP ] [ CMOVL ] 2bi ;
 128
 129 M: x86 %not     drop NOT ;
 130 M: x86 %log2    BSR ;
 131
 132 GENERIC: copy-register* ( dst src rep -- )
 133
 134 M: int-rep copy-register* drop MOV ;
 135 M: tagged-rep copy-register* drop MOV ;
 136 M: float-rep copy-register* drop MOVSS ;
 137 M: double-rep copy-register* drop MOVSD ;
 138 M: float-4-rep copy-register* drop MOVUPS ;
 139 M: double-2-rep copy-register* drop MOVUPD ;
 140 M: vector-rep copy-register* drop MOVDQU ;
 141
 142 : copy-register ( dst src rep -- )
 143     2over eq? [ 3drop ] [ copy-register* ] if ;
 144
 145 M: x86 %copy ( dst src rep -- ) copy-register ;
 146
 147 :: overflow-template ( label dst src1 src2 insn -- )
 148     src1 src2 insn call
 149     label JO ; inline
 150
 151 M: x86 %fixnum-add ( label dst src1 src2 -- )
 152     [ ADD ] overflow-template ;
 153
 154 M: x86 %fixnum-sub ( label dst src1 src2 -- )
 155     [ SUB ] overflow-template ;
 156
 157 M: x86 %fixnum-mul ( label dst src1 src2 -- )
 158     [ swap IMUL2 ] overflow-template ;
 159
 160 : bignum@ ( reg n -- op )
 161     cells bignum tag-number - [+] ; inline
 162
 163 M:: x86 %integer>bignum ( dst src temp -- )
 164     #! on entry, inreg is a signed 32-bit quantity
 165     #! exits with tagged ptr to bignum in outreg
 166     #! 1 cell header, 1 cell length, 1 cell sign, + digits
 167     #! length is the # of digits + sign
 168     [
 169         "end" define-label
 170         ! Load cached zero value
 171         dst 0 >bignum %load-reference
 172         src 0 CMP
 173         ! Is it zero? Then just go to the end and return this zero
 174         "end" get JE
 175         ! Allocate a bignum
 176         dst 4 cells bignum temp %allot
 177         ! Write length
 178         dst 1 bignum@ 2 tag-fixnum MOV
 179         ! Store value
 180         dst 3 bignum@ src MOV
 181         ! Compute sign
 182         temp src MOV
 183         temp cell-bits 1 - SAR
 184         temp 1 AND
 185         ! Store sign
 186         dst 2 bignum@ temp MOV
 187         ! Make negative value positive
 188         temp temp ADD
 189         temp NEG
 190         temp 1 ADD
 191         src temp IMUL2
 192         ! Store the bignum
 193         dst 3 bignum@ temp MOV
 194         "end" resolve-label
 195     ] with-scope ;
 196
 197 M:: x86 %bignum>integer ( dst src temp -- )
 198     [
 199         "end" define-label
 200         ! load length
 201         temp src 1 bignum@ MOV
 202         ! if the length is 1, its just the sign and nothing else,
 203         ! so output 0
 204         dst 0 MOV
 205         temp 1 tag-fixnum CMP
 206         "end" get JE
 207         ! load the value
 208         dst src 3 bignum@ MOV
 209         ! load the sign
 210         temp src 2 bignum@ MOV
 211         ! convert it into -1 or 1
 212         temp temp ADD
 213         temp NEG
 214         temp 1 ADD
 215         ! make dst signed
 216         temp dst IMUL2
 217         "end" resolve-label
 218     ] with-scope ;
 219
 220 M: x86 %add-float nip ADDSD ;
 221 M: x86 %sub-float nip SUBSD ;
 222 M: x86 %mul-float nip MULSD ;
 223 M: x86 %div-float nip DIVSD ;
 224 M: x86 %min-float nip MINSD ;
 225 M: x86 %max-float nip MAXSD ;
 226 M: x86 %sqrt SQRTSD ;
 227
 228 M: x86 %single>double-float CVTSS2SD ;
 229 M: x86 %double>single-float CVTSD2SS ;
 230
 231 M: x86 %integer>float CVTSI2SD ;
 232 M: x86 %float>integer CVTTSD2SI ;
 233
 234 M: x86 %unbox-float ( dst src -- )
 235     float-offset [+] MOVSD ;
 236
 237 M:: x86 %box-float ( dst src temp -- )
 238     dst 16 float temp %allot
 239     dst float-offset [+] src MOVSD ;
 240
 241 M:: x86 %box-vector ( dst src rep temp -- )
 242     dst rep rep-size 2 cells + byte-array temp %allot
 243     16 tag-fixnum dst 1 byte-array tag-number %set-slot-imm
 244     dst byte-array-offset [+]
 245     src rep copy-register ;
 246
 247 M:: x86 %unbox-vector ( dst src rep -- )
 248     dst src byte-array-offset [+]
 249     rep copy-register ;
 250
 251 M: x86 %broadcast-vector ( dst src rep -- )
 252     {
 253         { float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] }
 254         { double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] }
 255     } case ;
 256
 257 M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
 258     rep {
 259         {
 260             float-4-rep
 261             [
 262                 dst src1 MOVSS
 263                 dst src2 UNPCKLPS
 264                 src3 src4 UNPCKLPS
 265                 dst src3 MOVLHPS
 266             ]
 267         }
 268     } case ;
 269
 270 M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
 271     rep {
 272         {
 273             double-2-rep
 274             [
 275                 dst src1 MOVSD
 276                 dst src2 UNPCKLPD
 277             ]
 278         }
 279     } case ;
 280
 281 M: x86 %add-vector ( dst src1 src2 rep -- )
 282     {
 283         { float-4-rep [ ADDPS ] }
 284         { double-2-rep [ ADDPD ] }
 285         { char-16-rep [ PADDB ] }
 286         { uchar-16-rep [ PADDB ] }
 287         { short-8-rep [ PADDW ] }
 288         { ushort-8-rep [ PADDW ] }
 289         { int-4-rep [ PADDD ] }
 290         { uint-4-rep [ PADDD ] }
 291     } case drop ;
 292
 293 M: x86 %sub-vector ( dst src1 src2 rep -- )
 294     {
 295         { float-4-rep [ SUBPS ] }
 296         { double-2-rep [ SUBPD ] }
 297         { char-16-rep [ PSUBB ] }
 298         { uchar-16-rep [ PSUBB ] }
 299         { short-8-rep [ PSUBW ] }
 300         { ushort-8-rep [ PSUBW ] }
 301         { int-4-rep [ PSUBD ] }
 302         { uint-4-rep [ PSUBD ] }
 303     } case drop ;
 304
 305 M: x86 %mul-vector ( dst src1 src2 rep -- )
 306     {
 307         { float-4-rep [ MULPS ] }
 308         { double-2-rep [ MULPD ] }
 309         { int-4-rep [ PMULLW ] }
 310     } case drop ;
 311
 312 M: x86 %div-vector ( dst src1 src2 rep -- )
 313     {
 314         { float-4-rep [ DIVPS ] }
 315         { double-2-rep [ DIVPD ] }
 316     } case drop ;
 317
 318 M: x86 %min-vector ( dst src1 src2 rep -- )
 319     {
 320         { float-4-rep [ MINPS ] }
 321         { double-2-rep [ MINPD ] }
 322     } case drop ;
 323
 324 M: x86 %max-vector ( dst src1 src2 rep -- )
 325     {
 326         { float-4-rep [ MAXPS ] }
 327         { double-2-rep [ MAXPD ] }
 328     } case drop ;
 329
 330 M: x86 %sqrt-vector ( dst src rep -- )
 331     {
 332         { float-4-rep [ SQRTPS ] }
 333         { double-2-rep [ SQRTPD ] }
 334     } case ;
 335
 336 M: x86 %horizontal-add-vector ( dst src rep -- )
 337     {
 338         { float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
 339         { double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
 340     } case ;
 341
 342 M: x86 %unbox-alien ( dst src -- )
 343     alien-offset [+] MOV ;
 344
 345 M:: x86 %unbox-any-c-ptr ( dst src temp -- )
 346     [
 347         { "is-byte-array" "end" "start" } [ define-label ] each
 348         dst 0 MOV
 349         temp src MOV
 350         ! We come back here with displaced aliens
 351         "start" resolve-label
 352         ! Is the object f?
 353         temp \ f tag-number CMP
 354         "end" get JE
 355         ! Is the object an alien?
 356         temp header-offset [+] alien type-number tag-fixnum CMP
 357         "is-byte-array" get JNE
 358         ! If so, load the offset and add it to the address
 359         dst temp alien-offset [+] ADD
 360         ! Now recurse on the underlying alien
 361         temp temp underlying-alien-offset [+] MOV
 362         "start" get JMP
 363         "is-byte-array" resolve-label
 364         ! Add byte array address to address being computed
 365         dst temp ADD
 366         ! Add an offset to start of byte array's data
 367         dst byte-array-offset ADD
 368         "end" resolve-label
 369     ] with-scope ;
 370
 371 : alien@ ( reg n -- op ) cells alien tag-number - [+] ;
 372
 373 :: %allot-alien ( dst displacement base temp -- )
 374     dst 4 cells alien temp %allot
 375     dst 1 alien@ base MOV ! alien
 376     dst 2 alien@ \ f tag-number MOV ! expired
 377     dst 3 alien@ displacement MOV ! displacement
 378     ;
 379
 380 M:: x86 %box-alien ( dst src temp -- )
 381     [
 382         "end" define-label
 383         dst \ f tag-number MOV
 384         src 0 CMP
 385         "end" get JE
 386         dst src \ f tag-number temp %allot-alien
 387         "end" resolve-label
 388     ] with-scope ;
 389
 390 M:: x86 %box-displaced-alien ( dst displacement base displacement' base' base-class -- )
 391     [
 392         "end" define-label
 393         "ok" define-label
 394         ! If displacement is zero, return the base
 395         dst base MOV
 396         displacement 0 CMP
 397         "end" get JE
 398         ! Quickly use displacement' before its needed for real, as allot temporary
 399         dst 4 cells alien displacement' %allot
 400         ! If base is already a displaced alien, unpack it
 401         base' base MOV
 402         displacement' displacement MOV
 403         base \ f tag-number CMP
 404         "ok" get JE
 405         base header-offset [+] alien type-number tag-fixnum CMP
 406         "ok" get JNE
 407         ! displacement += base.displacement
 408         displacement' base 3 alien@ ADD
 409         ! base = base.base
 410         base' base 1 alien@ MOV
 411         "ok" resolve-label
 412         dst 1 alien@ base' MOV ! alien
 413         dst 2 alien@ \ f tag-number MOV ! expired
 414         dst 3 alien@ displacement' MOV ! displacement
 415         "end" resolve-label
 416     ] with-scope ;
 417
 418 ! The 'small-reg' mess is pretty crappy, but its only used on x86-32.
 419 ! On x86-64, all registers have 8-bit versions. However, a similar
 420 ! problem arises for shifts, where the shift count must be in CL, and
 421 ! so one day I will fix this properly by adding precoloring to the
 422 ! register allocator.
 423
 424 HOOK: has-small-reg? cpu ( reg size -- ? )
 425
 426 CONSTANT: have-byte-regs { EAX ECX EDX EBX }
 427
 428 M: x86.32 has-small-reg?
 429     {
 430         { 8 [ have-byte-regs memq? ] }
 431         { 16 [ drop t ] }
 432         { 32 [ drop t ] }
 433     } case ;
 434
 435 M: x86.64 has-small-reg? 2drop t ;
 436
 437 : small-reg-that-isn't ( exclude -- reg' )
 438     [ have-byte-regs ] dip
 439     [ native-version-of ] map
 440     '[ _ memq? not ] find nip ;
 441
 442 : with-save/restore ( reg quot -- )
 443     [ drop PUSH ] [ call ] [ drop POP ] 2tri ; inline
 444
 445 :: with-small-register ( dst exclude size quot: ( new-dst -- ) -- )
 446     ! If the destination register overlaps a small register with
 447     ! 'size' bits, we call the quot with that. Otherwise, we find a
 448     ! small register that is not in exclude, and call quot, saving and
 449     ! restoring the small register.
 450     dst size has-small-reg? [ dst quot call ] [
 451         exclude small-reg-that-isn't
 452         [ quot call ] with-save/restore
 453     ] if ; inline
 454
 455 : ?MOV ( dst src -- )
 456     2dup = [ 2drop ] [ MOV ] if ; inline
 457
 458 M:: x86 %string-nth ( dst src index temp -- )
 459     ! We request a small-reg of size 8 since those of size 16 are
 460     ! a superset.
 461     "end" define-label
 462     dst { src index temp } 8 [| new-dst |
 463         ! Load the least significant 7 bits into new-dst.
 464         ! 8th bit indicates whether we have to load from
 465         ! the aux vector or not.
 466         temp src index [+] LEA
 467         new-dst 8-bit-version-of temp string-offset [+] MOV
 468         new-dst new-dst 8-bit-version-of MOVZX
 469         ! Do we have to look at the aux vector?
 470         new-dst HEX: 80 CMP
 471         "end" get JL
 472         ! Yes, this is a non-ASCII character. Load aux vector
 473         temp src string-aux-offset [+] MOV
 474         new-dst temp XCHG
 475         ! Compute index
 476         new-dst index ADD
 477         new-dst index ADD
 478         ! Load high 16 bits
 479         new-dst 16-bit-version-of new-dst byte-array-offset [+] MOV
 480         new-dst new-dst 16-bit-version-of MOVZX
 481         new-dst 7 SHL
 482         ! Compute code point
 483         new-dst temp XOR
 484         "end" resolve-label
 485         dst new-dst ?MOV
 486     ] with-small-register ;
 487
 488 M:: x86 %set-string-nth-fast ( ch str index temp -- )
 489     ch { index str temp } 8 [| new-ch |
 490         new-ch ch ?MOV
 491         temp str index [+] LEA
 492         temp string-offset [+] new-ch 8-bit-version-of MOV
 493     ] with-small-register ;
 494
 495 :: %alien-integer-getter ( dst src size quot -- )
 496     dst { src } size [| new-dst |
 497         new-dst dup size n-bit-version-of dup src [] MOV
 498         quot call
 499         dst new-dst ?MOV
 500     ] with-small-register ; inline
 501
 502 : %alien-unsigned-getter ( dst src size -- )
 503     [ MOVZX ] %alien-integer-getter ; inline
 504
 505 M: x86 %alien-unsigned-1 8 %alien-unsigned-getter ;
 506 M: x86 %alien-unsigned-2 16 %alien-unsigned-getter ;
 507 M: x86 %alien-unsigned-4 32 [ 2drop ] %alien-integer-getter ;
 508
 509 : %alien-signed-getter ( dst src size -- )
 510     [ MOVSX ] %alien-integer-getter ; inline
 511
 512 M: x86 %alien-signed-1 8 %alien-signed-getter ;
 513 M: x86 %alien-signed-2 16 %alien-signed-getter ;
 514 M: x86 %alien-signed-4 32 %alien-signed-getter ;
 515
 516 M: x86 %alien-cell [] MOV ;
 517 M: x86 %alien-float [] MOVSS ;
 518 M: x86 %alien-double [] MOVSD ;
 519 M: x86 %alien-vector [ [] ] dip copy-register ;
 520
 521 :: %alien-integer-setter ( ptr value size -- )
 522     value { ptr } size [| new-value |
 523         new-value value ?MOV
 524         ptr [] new-value size n-bit-version-of MOV
 525     ] with-small-register ; inline
 526
 527 M: x86 %set-alien-integer-1 8 %alien-integer-setter ;
 528 M: x86 %set-alien-integer-2 16 %alien-integer-setter ;
 529 M: x86 %set-alien-integer-4 32 %alien-integer-setter ;
 530 M: x86 %set-alien-cell [ [] ] dip MOV ;
 531 M: x86 %set-alien-float [ [] ] dip MOVSS ;
 532 M: x86 %set-alien-double [ [] ] dip MOVSD ;
 533 M: x86 %set-alien-vector [ [] ] 2dip copy-register ;
 534
 535 : shift-count? ( reg -- ? ) { ECX RCX } memq? ;
 536
 537 :: emit-shift ( dst src1 src2 quot -- )
 538     src2 shift-count? [
 539         dst CL quot call
 540     ] [
 541         dst shift-count? [
 542             dst src2 XCHG
 543             src2 CL quot call
 544             dst src2 XCHG
 545         ] [
 546             ECX native-version-of [
 547                 CL src2 MOV
 548                 drop dst CL quot call
 549             ] with-save/restore
 550         ] if
 551     ] if ; inline
 552
 553 M: x86 %shl [ SHL ] emit-shift ;
 554 M: x86 %shr [ SHR ] emit-shift ;
 555 M: x86 %sar [ SAR ] emit-shift ;
 556
 557 M: x86 %vm-field-ptr ( dst field -- )
 558     [ drop 0 MOV rc-absolute-cell rt-vm rel-fixup ]
 559     [ vm-field-offset ADD ] 2bi ;
 560
 561 : load-zone-ptr ( reg -- )
 562     #! Load pointer to start of zone array
 563     "nursery" %vm-field-ptr ;
 564
 565 : load-allot-ptr ( nursery-ptr allot-ptr -- )
 566     [ drop load-zone-ptr ] [ swap cell [+] MOV ] 2bi ;
 567
 568 : inc-allot-ptr ( nursery-ptr n -- )
 569     [ cell [+] ] dip 8 align ADD ;
 570
 571 : store-header ( temp class -- )
 572     [ [] ] [ type-number tag-fixnum ] bi* MOV ;
 573
 574 : store-tagged ( dst tag -- )
 575     tag-number OR ;
 576
 577 M:: x86 %allot ( dst size class nursery-ptr -- )
 578     nursery-ptr dst load-allot-ptr
 579     dst class store-header
 580     dst class store-tagged
 581     nursery-ptr size inc-allot-ptr ;
 582
 583
 584 M:: x86 %write-barrier ( src card# table -- )
 585     #! Mark the card pointed to by vreg.
 586     ! Mark the card
 587     card# src MOV
 588     card# card-bits SHR
 589     table "cards_offset" %vm-field-ptr
 590     table table [] MOV
 591     table card# [+] card-mark <byte> MOV
 592
 593     ! Mark the card deck
 594     card# deck-bits card-bits - SHR
 595     table "decks_offset" %vm-field-ptr
 596     table table [] MOV
 597     table card# [+] card-mark <byte> MOV ;
 598
 599 M:: x86 %check-nursery ( label temp1 temp2 -- )
 600     temp1 load-zone-ptr
 601     temp2 temp1 cell [+] MOV
 602     temp2 1024 ADD
 603     temp1 temp1 3 cells [+] MOV
 604     temp2 temp1 CMP
 605     label JLE ;
 606
 607 M: x86 %save-gc-root ( gc-root register -- ) [ gc-root@ ] dip MOV ;
 608
 609 M: x86 %load-gc-root ( gc-root register -- ) swap gc-root@ MOV ;
 610
 611 M:: x86 %call-gc ( gc-root-count -- )
 612     ! Pass pointer to start of GC roots as first parameter
 613     param-reg-1 gc-root-base param@ LEA
 614     ! Pass number of roots as second parameter
 615     param-reg-2 gc-root-count MOV
 616     ! Call GC
 617     "inline_gc" %vm-invoke-3rd-arg ;
 618
 619 M: x86 %alien-global ( dst symbol library -- )
 620     [ 0 MOV ] 2dip rc-absolute-cell rel-dlsym ;
 621
 622 M: x86 %epilogue ( n -- ) cell - incr-stack-reg ;
 623
 624 :: %boolean ( dst temp word -- )
 625     dst \ f tag-number MOV
 626     temp 0 MOV \ t rc-absolute-cell rel-immediate
 627     dst temp word execute ; inline
 628
 629 M:: x86 %compare ( dst src1 src2 cc temp -- )
 630     src1 src2 CMP
 631     cc order-cc {
 632         { cc<  [ dst temp \ CMOVL %boolean ] }
 633         { cc<= [ dst temp \ CMOVLE %boolean ] }
 634         { cc>  [ dst temp \ CMOVG %boolean ] }
 635         { cc>= [ dst temp \ CMOVGE %boolean ] }
 636         { cc=  [ dst temp \ CMOVE %boolean ] }
 637         { cc/= [ dst temp \ CMOVNE %boolean ] }
 638     } case ;
 639
 640 M: x86 %compare-imm ( dst src1 src2 cc temp -- )
 641     %compare ;
 642
 643 : %cmov-float= ( dst src -- )
 644     [
 645         "no-move" define-label
 646
 647         "no-move" get [ JNE ] [ JP ] bi
 648         MOV
 649         "no-move" resolve-label
 650     ] with-scope ;
 651
 652 : %cmov-float/= ( dst src -- )
 653     [
 654         "no-move" define-label
 655         "move" define-label
 656
 657         "move" get JP
 658         "no-move" get JE
 659         "move" resolve-label
 660         MOV
 661         "no-move" resolve-label
 662     ] with-scope ;
 663
 664 :: (%compare-float) ( dst src1 src2 cc temp compare -- )
 665     cc {
 666         { cc<    [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVA  %boolean ] }
 667         { cc<=   [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
 668         { cc>    [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVA  %boolean ] }
 669         { cc>=   [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
 670         { cc=    [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float= %boolean ] }
 671         { cc<>   [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNE %boolean ] }
 672         { cc<>=  [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNP %boolean ] }
 673         { cc/<   [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
 674         { cc/<=  [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVB  %boolean ] }
 675         { cc/>   [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
 676         { cc/>=  [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVB  %boolean ] }
 677         { cc/=   [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float/= %boolean ] }
 678         { cc/<>  [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVE  %boolean ] }
 679         { cc/<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVP  %boolean ] }
 680     } case ; inline
 681
 682 M: x86 %compare-float-ordered ( dst src1 src2 cc temp -- )
 683     \ COMISD (%compare-float) ;
 684
 685 M: x86 %compare-float-unordered ( dst src1 src2 cc temp -- )
 686     \ UCOMISD (%compare-float) ;
 687
 688 M:: x86 %compare-branch ( label src1 src2 cc -- )
 689     src1 src2 CMP
 690     cc order-cc {
 691         { cc<  [ label JL ] }
 692         { cc<= [ label JLE ] }
 693         { cc>  [ label JG ] }
 694         { cc>= [ label JGE ] }
 695         { cc=  [ label JE ] }
 696         { cc/= [ label JNE ] }
 697     } case ;
 698
 699 M: x86 %compare-imm-branch ( label src1 src2 cc -- )
 700     %compare-branch ;
 701
 702 : %jump-float= ( label -- )
 703     [
 704         "no-jump" define-label
 705         "no-jump" get JP
 706         JE
 707         "no-jump" resolve-label
 708     ] with-scope ;
 709
 710 : %jump-float/= ( label -- )
 711     [ JNE ] [ JP ] bi ;
 712
 713 :: (%compare-float-branch) ( label src1 src2 cc compare -- )
 714     cc {
 715         { cc<    [ src2 src1 \ compare execute( a b -- ) label JA  ] }
 716         { cc<=   [ src2 src1 \ compare execute( a b -- ) label JAE ] }
 717         { cc>    [ src1 src2 \ compare execute( a b -- ) label JA  ] }
 718         { cc>=   [ src1 src2 \ compare execute( a b -- ) label JAE ] }
 719         { cc=    [ src1 src2 \ compare execute( a b -- ) label %jump-float= ] }
 720         { cc<>   [ src1 src2 \ compare execute( a b -- ) label JNE ] }
 721         { cc<>=  [ src1 src2 \ compare execute( a b -- ) label JNP ] }
 722         { cc/<   [ src2 src1 \ compare execute( a b -- ) label JBE ] }
 723         { cc/<=  [ src2 src1 \ compare execute( a b -- ) label JB  ] }
 724         { cc/>   [ src1 src2 \ compare execute( a b -- ) label JBE ] }
 725         { cc/>=  [ src1 src2 \ compare execute( a b -- ) label JB  ] }
 726         { cc/=   [ src1 src2 \ compare execute( a b -- ) label %jump-float/= ] }
 727         { cc/<>  [ src1 src2 \ compare execute( a b -- ) label JE  ] }
 728         { cc/<>= [ src1 src2 \ compare execute( a b -- ) label JP  ] }
 729     } case ;
 730
 731 M: x86 %compare-float-ordered-branch ( label src1 src2 cc -- )
 732     \ COMISD (%compare-float-branch) ;
 733
 734 M: x86 %compare-float-unordered-branch ( label src1 src2 cc -- )
 735     \ UCOMISD (%compare-float-branch) ;
 736
 737 M:: x86 %spill ( src rep n -- )
 738     n spill@ src rep copy-register ;
 739
 740 M:: x86 %reload ( dst rep n -- )
 741     dst n spill@ rep copy-register ;
 742
 743 M: x86 %loop-entry 16 code-alignment [ NOP ] times ;
 744
 745 M:: x86 %save-context ( temp1 temp2 callback-allowed? -- )
 746     #! Save Factor stack pointers in case the C code calls a
 747     #! callback which does a GC, which must reliably trace
 748     #! all roots.
 749     temp1 0 MOV rc-absolute-cell rt-vm rel-fixup
 750     temp1 temp1 "stack_chain" vm-field-offset [+] MOV
 751     temp2 stack-reg cell neg [+] LEA
 752     temp1 [] temp2 MOV
 753     callback-allowed? [
 754         temp1 2 cells [+] ds-reg MOV
 755         temp1 3 cells [+] rs-reg MOV
 756     ] when ;
 757
 758 M: x86 value-struct? drop t ;
 759
 760 M: x86 small-enough? ( n -- ? )
 761     HEX: -80000000 HEX: 7fffffff between? ;
 762
 763 : next-stack@ ( n -- operand )
 764     #! nth parameter from the next stack frame. Used to box
 765     #! input values to callbacks; the callback has its own
 766     #! stack frame set up, and we want to read the frame
 767     #! set up by the caller.
 768     stack-frame get total-size>> + stack@ ;
 769
 770 : enable-sse2 ( -- )
 771     enable-float-intrinsics
 772     enable-fsqrt
 773     enable-float-min/max
 774     enable-sse2-simd ;
 775
 776 : enable-sse3 ( -- )
 777     enable-sse2
 778     enable-sse3-simd ;
 779
 780 enable-min/max
 781 enable-fixnum-log2