basis/cpu/x86/x86.factor

   1 ! Copyright (C) 2005, 2009 Slava Pestov.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: accessors assocs alien alien.c-types arrays strings
   4 cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
   5 cpu.architecture kernel kernel.private math memory namespaces make
   6 sequences words system layouts combinators math.order fry locals
   7 compiler.constants byte-arrays io macros quotations cpu.x86.features
   8 cpu.x86.features.private compiler compiler.units init
   9 compiler.cfg.registers
  10 compiler.cfg.instructions
  11 compiler.cfg.intrinsics
  12 compiler.cfg.comparisons
  13 compiler.cfg.stack-frame
  14 compiler.codegen
  15 compiler.codegen.fixup ;
  16 FROM: math => float ;
  17 IN: cpu.x86
  18
  19 << enable-fixnum-log2 >>
  20
  21 ! Add some methods to the assembler to be more useful to the backend
  22 M: label JMP 0 JMP rc-relative label-fixup ;
  23 M: label JUMPcc [ 0 ] dip JUMPcc rc-relative label-fixup ;
  24
  25 M: x86 two-operand? t ;
  26
  27 HOOK: stack-reg cpu ( -- reg )
  28
  29 HOOK: reserved-area-size cpu ( -- n )
  30
  31 : stack@ ( n -- op ) stack-reg swap [+] ;
  32
  33 : param@ ( n -- op ) reserved-area-size + stack@ ;
  34
  35 : spill@ ( n -- op ) spill-offset param@ ;
  36
  37 : gc-root@ ( n -- op ) gc-root-offset param@ ;
  38
  39 : decr-stack-reg ( n -- )
  40     dup 0 = [ drop ] [ stack-reg swap SUB ] if ;
  41
  42 : incr-stack-reg ( n -- )
  43     dup 0 = [ drop ] [ stack-reg swap ADD ] if ;
  44
  45 : align-stack ( n -- n' )
  46     os macosx? cpu x86.64? or [ 16 align ] when ;
  47
  48 M: x86 stack-frame-size ( stack-frame -- i )
  49     (stack-frame-size) 3 cells reserved-area-size + + align-stack ;
  50
  51 ! Must be a volatile register not used for parameter passing, for safe
  52 ! use in calls in and out of C
  53 HOOK: temp-reg cpu ( -- reg )
  54
  55 ! Fastcall calling convention
  56 HOOK: param-reg-1 cpu ( -- reg )
  57 HOOK: param-reg-2 cpu ( -- reg )
  58
  59 HOOK: pic-tail-reg cpu ( -- reg )
  60
  61 M: x86 %load-immediate dup 0 = [ drop dup XOR ] [ MOV ] if ;
  62
  63 M: x86 %load-reference swap 0 MOV rc-absolute-cell rel-immediate ;
  64
  65 HOOK: ds-reg cpu ( -- reg )
  66 HOOK: rs-reg cpu ( -- reg )
  67
  68 : reg-stack ( n reg -- op ) swap cells neg [+] ;
  69
  70 GENERIC: loc>operand ( loc -- operand )
  71
  72 M: ds-loc loc>operand n>> ds-reg reg-stack ;
  73 M: rs-loc loc>operand n>> rs-reg reg-stack ;
  74
  75 M: x86 %peek loc>operand MOV ;
  76 M: x86 %replace loc>operand swap MOV ;
  77 : (%inc) ( n reg -- ) swap cells dup 0 > [ ADD ] [ neg SUB ] if ; inline
  78 M: x86 %inc-d ( n -- ) ds-reg (%inc) ;
  79 M: x86 %inc-r ( n -- ) rs-reg (%inc) ;
  80
  81 M: x86 %call ( word -- ) 0 CALL rc-relative rel-word-pic ;
  82
  83 : xt-tail-pic-offset ( -- n )
  84     #! See the comment in vm/cpu-x86.hpp
  85     cell 4 + 1 + ; inline
  86
  87 M: x86 %jump ( word -- )
  88     pic-tail-reg 0 MOV xt-tail-pic-offset rc-absolute-cell rel-here
  89     0 JMP rc-relative rel-word-pic-tail ;
  90
  91 M: x86 %jump-label ( label -- ) 0 JMP rc-relative label-fixup ;
  92
  93 M: x86 %return ( -- ) 0 RET ;
  94
  95 : code-alignment ( align -- n )
  96     [ building get length dup ] dip align swap - ;
  97
  98 : align-code ( n -- )
  99     0 <repetition> % ;
 100
 101 :: (%slot) ( obj slot tag temp -- op )
 102     temp slot obj [+] LEA
 103     temp tag neg [+] ; inline
 104
 105 :: (%slot-imm) ( obj slot tag -- op )
 106     obj slot cells tag - [+] ; inline
 107
 108 M: x86 %slot ( dst obj slot tag temp -- ) (%slot) MOV ;
 109 M: x86 %slot-imm ( dst obj slot tag -- ) (%slot-imm) MOV ;
 110 M: x86 %set-slot ( src obj slot tag temp -- ) (%slot) swap MOV ;
 111 M: x86 %set-slot-imm ( src obj slot tag -- ) (%slot-imm) swap MOV ;
 112
 113 M: x86 %add     2over eq? [ nip ADD ] [ [+] LEA ] if ;
 114 M: x86 %add-imm 2over eq? [ nip ADD ] [ [+] LEA ] if ;
 115 M: x86 %sub     nip SUB ;
 116 M: x86 %sub-imm 2over eq? [ nip SUB ] [ neg [+] LEA ] if ;
 117 M: x86 %mul     nip swap IMUL2 ;
 118 M: x86 %mul-imm IMUL3 ;
 119 M: x86 %and     nip AND ;
 120 M: x86 %and-imm nip AND ;
 121 M: x86 %or      nip OR ;
 122 M: x86 %or-imm  nip OR ;
 123 M: x86 %xor     nip XOR ;
 124 M: x86 %xor-imm nip XOR ;
 125 M: x86 %shl-imm nip SHL ;
 126 M: x86 %shr-imm nip SHR ;
 127 M: x86 %sar-imm nip SAR ;
 128
 129 M: x86 %min     nip [ CMP ] [ CMOVG ] 2bi ;
 130 M: x86 %max     nip [ CMP ] [ CMOVL ] 2bi ;
 131
 132 M: x86 %not     drop NOT ;
 133 M: x86 %log2    BSR ;
 134
 135 GENERIC: copy-register* ( dst src rep -- )
 136
 137 M: int-rep copy-register* drop MOV ;
 138 M: tagged-rep copy-register* drop MOV ;
 139 M: float-rep copy-register* drop MOVSS ;
 140 M: double-rep copy-register* drop MOVSD ;
 141 M: float-4-rep copy-register* drop MOVUPS ;
 142 M: double-2-rep copy-register* drop MOVUPD ;
 143 M: vector-rep copy-register* drop MOVDQU ;
 144
 145 : copy-register ( dst src rep -- )
 146     2over eq? [ 3drop ] [ copy-register* ] if ;
 147
 148 M: x86 %copy ( dst src rep -- ) copy-register ;
 149
 150 :: overflow-template ( label dst src1 src2 insn -- )
 151     src1 src2 insn call
 152     label JO ; inline
 153
 154 M: x86 %fixnum-add ( label dst src1 src2 -- )
 155     [ ADD ] overflow-template ;
 156
 157 M: x86 %fixnum-sub ( label dst src1 src2 -- )
 158     [ SUB ] overflow-template ;
 159
 160 M: x86 %fixnum-mul ( label dst src1 src2 -- )
 161     [ swap IMUL2 ] overflow-template ;
 162
 163 : bignum@ ( reg n -- op )
 164     cells bignum tag-number - [+] ; inline
 165
 166 M:: x86 %integer>bignum ( dst src temp -- )
 167     #! on entry, inreg is a signed 32-bit quantity
 168     #! exits with tagged ptr to bignum in outreg
 169     #! 1 cell header, 1 cell length, 1 cell sign, + digits
 170     #! length is the # of digits + sign
 171     [
 172         "end" define-label
 173         ! Load cached zero value
 174         dst 0 >bignum %load-reference
 175         src 0 CMP
 176         ! Is it zero? Then just go to the end and return this zero
 177         "end" get JE
 178         ! Allocate a bignum
 179         dst 4 cells bignum temp %allot
 180         ! Write length
 181         dst 1 bignum@ 2 tag-fixnum MOV
 182         ! Store value
 183         dst 3 bignum@ src MOV
 184         ! Compute sign
 185         temp src MOV
 186         temp cell-bits 1 - SAR
 187         temp 1 AND
 188         ! Store sign
 189         dst 2 bignum@ temp MOV
 190         ! Make negative value positive
 191         temp temp ADD
 192         temp NEG
 193         temp 1 ADD
 194         src temp IMUL2
 195         ! Store the bignum
 196         dst 3 bignum@ temp MOV
 197         "end" resolve-label
 198     ] with-scope ;
 199
 200 M:: x86 %bignum>integer ( dst src temp -- )
 201     [
 202         "end" define-label
 203         ! load length
 204         temp src 1 bignum@ MOV
 205         ! if the length is 1, its just the sign and nothing else,
 206         ! so output 0
 207         dst 0 MOV
 208         temp 1 tag-fixnum CMP
 209         "end" get JE
 210         ! load the value
 211         dst src 3 bignum@ MOV
 212         ! load the sign
 213         temp src 2 bignum@ MOV
 214         ! convert it into -1 or 1
 215         temp temp ADD
 216         temp NEG
 217         temp 1 ADD
 218         ! make dst signed
 219         temp dst IMUL2
 220         "end" resolve-label
 221     ] with-scope ;
 222
 223 M: x86 %add-float nip ADDSD ;
 224 M: x86 %sub-float nip SUBSD ;
 225 M: x86 %mul-float nip MULSD ;
 226 M: x86 %div-float nip DIVSD ;
 227 M: x86 %min-float nip MINSD ;
 228 M: x86 %max-float nip MAXSD ;
 229 M: x86 %sqrt SQRTSD ;
 230
 231 M: x86 %single>double-float CVTSS2SD ;
 232 M: x86 %double>single-float CVTSD2SS ;
 233
 234 M: x86 %integer>float CVTSI2SD ;
 235 M: x86 %float>integer CVTTSD2SI ;
 236
 237 M: x86 %unbox-float ( dst src -- )
 238     float-offset [+] MOVSD ;
 239
 240 M:: x86 %box-float ( dst src temp -- )
 241     dst 16 float temp %allot
 242     dst float-offset [+] src MOVSD ;
 243
 244 M:: x86 %box-vector ( dst src rep temp -- )
 245     dst rep rep-size 2 cells + byte-array temp %allot
 246     16 tag-fixnum dst 1 byte-array tag-number %set-slot-imm
 247     dst byte-array-offset [+]
 248     src rep copy-register ;
 249
 250 M:: x86 %unbox-vector ( dst src rep -- )
 251     dst src byte-array-offset [+]
 252     rep copy-register ;
 253
 254 MACRO: available-reps ( alist -- )
 255     ! Each SSE version adds new representations and supports
 256     ! all old ones
 257     unzip { } [ append ] accumulate rest swap suffix
 258     [ [ 1quotation ] map ] bi@ zip
 259     reverse [ { } ] suffix
 260     '[ _ cond ] ;
 261
 262 M: x86 %broadcast-vector ( dst src rep -- )
 263     {
 264         { float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] }
 265         { double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] }
 266     } case ;
 267
 268 M: x86 %broadcast-vector-reps
 269     {
 270         { sse? { float-4-rep } }
 271         { sse2? { double-2-rep } }
 272     } available-reps ;
 273
 274 M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
 275     rep {
 276         {
 277             float-4-rep
 278             [
 279                 dst src1 MOVSS
 280                 dst src2 UNPCKLPS
 281                 src3 src4 UNPCKLPS
 282                 dst src3 MOVLHPS
 283             ]
 284         }
 285     } case ;
 286
 287 M: x86 %gather-vector-4-reps
 288     {
 289         { sse? { float-4-rep } }
 290     } available-reps ;
 291
 292 M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
 293     rep {
 294         {
 295             double-2-rep
 296             [
 297                 dst src1 MOVSD
 298                 dst src2 UNPCKLPD
 299             ]
 300         }
 301     } case ;
 302
 303 M: x86 %gather-vector-2-reps
 304     {
 305         { sse2? { double-2-rep } }
 306     } available-reps ;
 307
 308 M: x86 %add-vector ( dst src1 src2 rep -- )
 309     {
 310         { float-4-rep [ ADDPS ] }
 311         { double-2-rep [ ADDPD ] }
 312         { char-16-rep [ PADDB ] }
 313         { uchar-16-rep [ PADDB ] }
 314         { short-8-rep [ PADDW ] }
 315         { ushort-8-rep [ PADDW ] }
 316         { int-4-rep [ PADDD ] }
 317         { uint-4-rep [ PADDD ] }
 318     } case drop ;
 319
 320 M: x86 %add-vector-reps
 321     {
 322         { sse? { float-4-rep } }
 323         { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
 324     } available-reps ;
 325
 326 M: x86 %sub-vector ( dst src1 src2 rep -- )
 327     {
 328         { float-4-rep [ SUBPS ] }
 329         { double-2-rep [ SUBPD ] }
 330         { char-16-rep [ PSUBB ] }
 331         { uchar-16-rep [ PSUBB ] }
 332         { short-8-rep [ PSUBW ] }
 333         { ushort-8-rep [ PSUBW ] }
 334         { int-4-rep [ PSUBD ] }
 335         { uint-4-rep [ PSUBD ] }
 336     } case drop ;
 337
 338 M: x86 %sub-vector-reps
 339     {
 340         { sse? { float-4-rep } }
 341         { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
 342     } available-reps ;
 343
 344 M: x86 %mul-vector ( dst src1 src2 rep -- )
 345     {
 346         { float-4-rep [ MULPS ] }
 347         { double-2-rep [ MULPD ] }
 348         { short-8-rep [ PMULLW ] }
 349         { ushort-8-rep [ PMULLW ] }
 350         { int-4-rep [ PMULLD ] }
 351         { uint-4-rep [ PMULLD ] }
 352     } case drop ;
 353
 354 M: x86 %mul-vector-reps
 355     {
 356         { sse? { float-4-rep } }
 357         { sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
 358     } available-reps ;
 359
 360 M: x86 %div-vector ( dst src1 src2 rep -- )
 361     {
 362         { float-4-rep [ DIVPS ] }
 363         { double-2-rep [ DIVPD ] }
 364     } case drop ;
 365
 366 M: x86 %div-vector-reps
 367     {
 368         { sse? { float-4-rep } }
 369         { sse2? { double-2-rep } }
 370     } available-reps ;
 371
 372 M: x86 %min-vector ( dst src1 src2 rep -- )
 373     {
 374         { float-4-rep [ MINPS ] }
 375         { double-2-rep [ MINPD ] }
 376         { uchar-16-rep [ PMINUB ] }
 377         { short-8-rep [ PMINSW ] }
 378     } case drop ;
 379
 380 M: x86 %min-vector-reps
 381     {
 382         { sse? { float-4-rep } }
 383         { sse2? { double-2-rep short-8-rep uchar-16-rep } }
 384     } available-reps ;
 385
 386 M: x86 %max-vector ( dst src1 src2 rep -- )
 387     {
 388         { float-4-rep [ MAXPS ] }
 389         { double-2-rep [ MAXPD ] }
 390         { uchar-16-rep [ PMAXUB ] }
 391         { short-8-rep [ PMAXSW ] }
 392     } case drop ;
 393
 394 M: x86 %max-vector-reps
 395     {
 396         { sse? { float-4-rep } }
 397         { sse2? { double-2-rep short-8-rep uchar-16-rep } }
 398     } available-reps ;
 399
 400 M: x86 %sqrt-vector ( dst src rep -- )
 401     {
 402         { float-4-rep [ SQRTPS ] }
 403         { double-2-rep [ SQRTPD ] }
 404     } case ;
 405
 406 M: x86 %sqrt-vector-reps
 407     {
 408         { sse? { float-4-rep } }
 409         { sse2? { double-2-rep } }
 410     } available-reps ;
 411
 412 M: x86 %horizontal-add-vector ( dst src rep -- )
 413     {
 414         { float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
 415         { double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
 416     } case ;
 417
 418 M: x86 %horizontal-add-vector-reps
 419     {
 420         { sse3? { float-4-rep double-2-rep } }
 421     } available-reps ;
 422
 423 M: x86 %unbox-alien ( dst src -- )
 424     alien-offset [+] MOV ;
 425
 426 M:: x86 %unbox-any-c-ptr ( dst src temp -- )
 427     [
 428         { "is-byte-array" "end" "start" } [ define-label ] each
 429         dst 0 MOV
 430         temp src MOV
 431         ! We come back here with displaced aliens
 432         "start" resolve-label
 433         ! Is the object f?
 434         temp \ f tag-number CMP
 435         "end" get JE
 436         ! Is the object an alien?
 437         temp header-offset [+] alien type-number tag-fixnum CMP
 438         "is-byte-array" get JNE
 439         ! If so, load the offset and add it to the address
 440         dst temp alien-offset [+] ADD
 441         ! Now recurse on the underlying alien
 442         temp temp underlying-alien-offset [+] MOV
 443         "start" get JMP
 444         "is-byte-array" resolve-label
 445         ! Add byte array address to address being computed
 446         dst temp ADD
 447         ! Add an offset to start of byte array's data
 448         dst byte-array-offset ADD
 449         "end" resolve-label
 450     ] with-scope ;
 451
 452 : alien@ ( reg n -- op ) cells alien tag-number - [+] ;
 453
 454 :: %allot-alien ( dst displacement base temp -- )
 455     dst 4 cells alien temp %allot
 456     dst 1 alien@ base MOV ! alien
 457     dst 2 alien@ \ f tag-number MOV ! expired
 458     dst 3 alien@ displacement MOV ! displacement
 459     ;
 460
 461 M:: x86 %box-alien ( dst src temp -- )
 462     [
 463         "end" define-label
 464         dst \ f tag-number MOV
 465         src 0 CMP
 466         "end" get JE
 467         dst src \ f tag-number temp %allot-alien
 468         "end" resolve-label
 469     ] with-scope ;
 470
 471 M:: x86 %box-displaced-alien ( dst displacement base displacement' base' base-class -- )
 472     [
 473         "end" define-label
 474         "ok" define-label
 475         ! If displacement is zero, return the base
 476         dst base MOV
 477         displacement 0 CMP
 478         "end" get JE
 479         ! Quickly use displacement' before its needed for real, as allot temporary
 480         dst 4 cells alien displacement' %allot
 481         ! If base is already a displaced alien, unpack it
 482         base' base MOV
 483         displacement' displacement MOV
 484         base \ f tag-number CMP
 485         "ok" get JE
 486         base header-offset [+] alien type-number tag-fixnum CMP
 487         "ok" get JNE
 488         ! displacement += base.displacement
 489         displacement' base 3 alien@ ADD
 490         ! base = base.base
 491         base' base 1 alien@ MOV
 492         "ok" resolve-label
 493         dst 1 alien@ base' MOV ! alien
 494         dst 2 alien@ \ f tag-number MOV ! expired
 495         dst 3 alien@ displacement' MOV ! displacement
 496         "end" resolve-label
 497     ] with-scope ;
 498
 499 ! The 'small-reg' mess is pretty crappy, but its only used on x86-32.
 500 ! On x86-64, all registers have 8-bit versions. However, a similar
 501 ! problem arises for shifts, where the shift count must be in CL, and
 502 ! so one day I will fix this properly by adding precoloring to the
 503 ! register allocator.
 504
 505 HOOK: has-small-reg? cpu ( reg size -- ? )
 506
 507 CONSTANT: have-byte-regs { EAX ECX EDX EBX }
 508
 509 M: x86.32 has-small-reg?
 510     {
 511         { 8 [ have-byte-regs memq? ] }
 512         { 16 [ drop t ] }
 513         { 32 [ drop t ] }
 514     } case ;
 515
 516 M: x86.64 has-small-reg? 2drop t ;
 517
 518 : small-reg-that-isn't ( exclude -- reg' )
 519     [ have-byte-regs ] dip
 520     [ native-version-of ] map
 521     '[ _ memq? not ] find nip ;
 522
 523 : with-save/restore ( reg quot -- )
 524     [ drop PUSH ] [ call ] [ drop POP ] 2tri ; inline
 525
 526 :: with-small-register ( dst exclude size quot: ( new-dst -- ) -- )
 527     ! If the destination register overlaps a small register with
 528     ! 'size' bits, we call the quot with that. Otherwise, we find a
 529     ! small register that is not in exclude, and call quot, saving and
 530     ! restoring the small register.
 531     dst size has-small-reg? [ dst quot call ] [
 532         exclude small-reg-that-isn't
 533         [ quot call ] with-save/restore
 534     ] if ; inline
 535
 536 : ?MOV ( dst src -- )
 537     2dup = [ 2drop ] [ MOV ] if ; inline
 538
 539 M:: x86 %string-nth ( dst src index temp -- )
 540     ! We request a small-reg of size 8 since those of size 16 are
 541     ! a superset.
 542     "end" define-label
 543     dst { src index temp } 8 [| new-dst |
 544         ! Load the least significant 7 bits into new-dst.
 545         ! 8th bit indicates whether we have to load from
 546         ! the aux vector or not.
 547         temp src index [+] LEA
 548         new-dst 8-bit-version-of temp string-offset [+] MOV
 549         new-dst new-dst 8-bit-version-of MOVZX
 550         ! Do we have to look at the aux vector?
 551         new-dst HEX: 80 CMP
 552         "end" get JL
 553         ! Yes, this is a non-ASCII character. Load aux vector
 554         temp src string-aux-offset [+] MOV
 555         new-dst temp XCHG
 556         ! Compute index
 557         new-dst index ADD
 558         new-dst index ADD
 559         ! Load high 16 bits
 560         new-dst 16-bit-version-of new-dst byte-array-offset [+] MOV
 561         new-dst new-dst 16-bit-version-of MOVZX
 562         new-dst 7 SHL
 563         ! Compute code point
 564         new-dst temp XOR
 565         "end" resolve-label
 566         dst new-dst ?MOV
 567     ] with-small-register ;
 568
 569 M:: x86 %set-string-nth-fast ( ch str index temp -- )
 570     ch { index str temp } 8 [| new-ch |
 571         new-ch ch ?MOV
 572         temp str index [+] LEA
 573         temp string-offset [+] new-ch 8-bit-version-of MOV
 574     ] with-small-register ;
 575
 576 :: %alien-integer-getter ( dst src size quot -- )
 577     dst { src } size [| new-dst |
 578         new-dst dup size n-bit-version-of dup src [] MOV
 579         quot call
 580         dst new-dst ?MOV
 581     ] with-small-register ; inline
 582
 583 : %alien-unsigned-getter ( dst src size -- )
 584     [ MOVZX ] %alien-integer-getter ; inline
 585
 586 M: x86 %alien-unsigned-1 8 %alien-unsigned-getter ;
 587 M: x86 %alien-unsigned-2 16 %alien-unsigned-getter ;
 588 M: x86 %alien-unsigned-4 32 [ 2drop ] %alien-integer-getter ;
 589
 590 : %alien-signed-getter ( dst src size -- )
 591     [ MOVSX ] %alien-integer-getter ; inline
 592
 593 M: x86 %alien-signed-1 8 %alien-signed-getter ;
 594 M: x86 %alien-signed-2 16 %alien-signed-getter ;
 595 M: x86 %alien-signed-4 32 %alien-signed-getter ;
 596
 597 M: x86 %alien-cell [] MOV ;
 598 M: x86 %alien-float [] MOVSS ;
 599 M: x86 %alien-double [] MOVSD ;
 600 M: x86 %alien-vector [ [] ] dip copy-register ;
 601
 602 :: %alien-integer-setter ( ptr value size -- )
 603     value { ptr } size [| new-value |
 604         new-value value ?MOV
 605         ptr [] new-value size n-bit-version-of MOV
 606     ] with-small-register ; inline
 607
 608 M: x86 %set-alien-integer-1 8 %alien-integer-setter ;
 609 M: x86 %set-alien-integer-2 16 %alien-integer-setter ;
 610 M: x86 %set-alien-integer-4 32 %alien-integer-setter ;
 611 M: x86 %set-alien-cell [ [] ] dip MOV ;
 612 M: x86 %set-alien-float [ [] ] dip MOVSS ;
 613 M: x86 %set-alien-double [ [] ] dip MOVSD ;
 614 M: x86 %set-alien-vector [ [] ] 2dip copy-register ;
 615
 616 : shift-count? ( reg -- ? ) { ECX RCX } memq? ;
 617
 618 :: emit-shift ( dst src1 src2 quot -- )
 619     src2 shift-count? [
 620         dst CL quot call
 621     ] [
 622         dst shift-count? [
 623             dst src2 XCHG
 624             src2 CL quot call
 625             dst src2 XCHG
 626         ] [
 627             ECX native-version-of [
 628                 CL src2 MOV
 629                 drop dst CL quot call
 630             ] with-save/restore
 631         ] if
 632     ] if ; inline
 633
 634 M: x86 %shl [ SHL ] emit-shift ;
 635 M: x86 %shr [ SHR ] emit-shift ;
 636 M: x86 %sar [ SAR ] emit-shift ;
 637
 638 : load-zone-ptr ( reg -- )
 639     #! Load pointer to start of zone array
 640     0 MOV "nursery" f rc-absolute-cell rel-dlsym ;
 641
 642 : load-allot-ptr ( nursery-ptr allot-ptr -- )
 643     [ drop load-zone-ptr ] [ swap cell [+] MOV ] 2bi ;
 644
 645 : inc-allot-ptr ( nursery-ptr n -- )
 646     [ cell [+] ] dip 8 align ADD ;
 647
 648 : store-header ( temp class -- )
 649     [ [] ] [ type-number tag-fixnum ] bi* MOV ;
 650
 651 : store-tagged ( dst tag -- )
 652     tag-number OR ;
 653
 654 M:: x86 %allot ( dst size class nursery-ptr -- )
 655     nursery-ptr dst load-allot-ptr
 656     dst class store-header
 657     dst class store-tagged
 658     nursery-ptr size inc-allot-ptr ;
 659
 660 M:: x86 %write-barrier ( src card# table -- )
 661     #! Mark the card pointed to by vreg.
 662     ! Mark the card
 663     card# src MOV
 664     card# card-bits SHR
 665     table "cards_offset" f %alien-global
 666     table table [] MOV
 667     table card# [+] card-mark <byte> MOV
 668
 669     ! Mark the card deck
 670     card# deck-bits card-bits - SHR
 671     table "decks_offset" f %alien-global
 672     table table [] MOV
 673     table card# [+] card-mark <byte> MOV ;
 674
 675 M:: x86 %check-nursery ( label temp1 temp2 -- )
 676     temp1 load-zone-ptr
 677     temp2 temp1 cell [+] MOV
 678     temp2 1024 ADD
 679     temp1 temp1 3 cells [+] MOV
 680     temp2 temp1 CMP
 681     label JLE ;
 682
 683 M: x86 %save-gc-root ( gc-root register -- ) [ gc-root@ ] dip MOV ;
 684
 685 M: x86 %load-gc-root ( gc-root register -- ) swap gc-root@ MOV ;
 686
 687 M:: x86 %call-gc ( gc-root-count -- )
 688     ! Pass pointer to start of GC roots as first parameter
 689     param-reg-1 gc-root-base param@ LEA
 690     ! Pass number of roots as second parameter
 691     param-reg-2 gc-root-count MOV
 692     ! Call GC
 693     "inline_gc" f %alien-invoke ;
 694
 695 M: x86 %alien-global
 696     [ 0 MOV ] 2dip rc-absolute-cell rel-dlsym ;
 697
 698 M: x86 %epilogue ( n -- ) cell - incr-stack-reg ;
 699
 700 :: %boolean ( dst temp word -- )
 701     dst \ f tag-number MOV
 702     temp 0 MOV \ t rc-absolute-cell rel-immediate
 703     dst temp word execute ; inline
 704
 705 M:: x86 %compare ( dst src1 src2 cc temp -- )
 706     src1 src2 CMP
 707     cc order-cc {
 708         { cc<  [ dst temp \ CMOVL %boolean ] }
 709         { cc<= [ dst temp \ CMOVLE %boolean ] }
 710         { cc>  [ dst temp \ CMOVG %boolean ] }
 711         { cc>= [ dst temp \ CMOVGE %boolean ] }
 712         { cc=  [ dst temp \ CMOVE %boolean ] }
 713         { cc/= [ dst temp \ CMOVNE %boolean ] }
 714     } case ;
 715
 716 M: x86 %compare-imm ( dst src1 src2 cc temp -- )
 717     %compare ;
 718
 719 : %cmov-float= ( dst src -- )
 720     [
 721         "no-move" define-label
 722
 723         "no-move" get [ JNE ] [ JP ] bi
 724         MOV
 725         "no-move" resolve-label
 726     ] with-scope ;
 727
 728 : %cmov-float/= ( dst src -- )
 729     [
 730         "no-move" define-label
 731         "move" define-label
 732
 733         "move" get JP
 734         "no-move" get JE
 735         "move" resolve-label
 736         MOV
 737         "no-move" resolve-label
 738     ] with-scope ;
 739
 740 :: (%compare-float) ( dst src1 src2 cc temp compare -- )
 741     cc {
 742         { cc<    [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVA  %boolean ] }
 743         { cc<=   [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
 744         { cc>    [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVA  %boolean ] }
 745         { cc>=   [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
 746         { cc=    [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float= %boolean ] }
 747         { cc<>   [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNE %boolean ] }
 748         { cc<>=  [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNP %boolean ] }
 749         { cc/<   [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
 750         { cc/<=  [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVB  %boolean ] }
 751         { cc/>   [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
 752         { cc/>=  [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVB  %boolean ] }
 753         { cc/=   [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float/= %boolean ] }
 754         { cc/<>  [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVE  %boolean ] }
 755         { cc/<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVP  %boolean ] }
 756     } case ; inline
 757
 758 M: x86 %compare-float-ordered ( dst src1 src2 cc temp -- )
 759     \ COMISD (%compare-float) ;
 760
 761 M: x86 %compare-float-unordered ( dst src1 src2 cc temp -- )
 762     \ UCOMISD (%compare-float) ;
 763
 764 M:: x86 %compare-branch ( label src1 src2 cc -- )
 765     src1 src2 CMP
 766     cc order-cc {
 767         { cc<  [ label JL ] }
 768         { cc<= [ label JLE ] }
 769         { cc>  [ label JG ] }
 770         { cc>= [ label JGE ] }
 771         { cc=  [ label JE ] }
 772         { cc/= [ label JNE ] }
 773     } case ;
 774
 775 M: x86 %compare-imm-branch ( label src1 src2 cc -- )
 776     %compare-branch ;
 777
 778 : %jump-float= ( label -- )
 779     [
 780         "no-jump" define-label
 781         "no-jump" get JP
 782         JE
 783         "no-jump" resolve-label
 784     ] with-scope ;
 785
 786 : %jump-float/= ( label -- )
 787     [ JNE ] [ JP ] bi ;
 788
 789 :: (%compare-float-branch) ( label src1 src2 cc compare -- )
 790     cc {
 791         { cc<    [ src2 src1 \ compare execute( a b -- ) label JA  ] }
 792         { cc<=   [ src2 src1 \ compare execute( a b -- ) label JAE ] }
 793         { cc>    [ src1 src2 \ compare execute( a b -- ) label JA  ] }
 794         { cc>=   [ src1 src2 \ compare execute( a b -- ) label JAE ] }
 795         { cc=    [ src1 src2 \ compare execute( a b -- ) label %jump-float= ] }
 796         { cc<>   [ src1 src2 \ compare execute( a b -- ) label JNE ] }
 797         { cc<>=  [ src1 src2 \ compare execute( a b -- ) label JNP ] }
 798         { cc/<   [ src2 src1 \ compare execute( a b -- ) label JBE ] }
 799         { cc/<=  [ src2 src1 \ compare execute( a b -- ) label JB  ] }
 800         { cc/>   [ src1 src2 \ compare execute( a b -- ) label JBE ] }
 801         { cc/>=  [ src1 src2 \ compare execute( a b -- ) label JB  ] }
 802         { cc/=   [ src1 src2 \ compare execute( a b -- ) label %jump-float/= ] }
 803         { cc/<>  [ src1 src2 \ compare execute( a b -- ) label JE  ] }
 804         { cc/<>= [ src1 src2 \ compare execute( a b -- ) label JP  ] }
 805     } case ;
 806
 807 M: x86 %compare-float-ordered-branch ( label src1 src2 cc -- )
 808     \ COMISD (%compare-float-branch) ;
 809
 810 M: x86 %compare-float-unordered-branch ( label src1 src2 cc -- )
 811     \ UCOMISD (%compare-float-branch) ;
 812
 813 M:: x86 %spill ( src rep n -- )
 814     n spill@ src rep copy-register ;
 815
 816 M:: x86 %reload ( dst rep n -- )
 817     dst n spill@ rep copy-register ;
 818
 819 M: x86 %loop-entry 16 code-alignment [ NOP ] times ;
 820
 821 M:: x86 %save-context ( temp1 temp2 callback-allowed? -- )
 822     #! Save Factor stack pointers in case the C code calls a
 823     #! callback which does a GC, which must reliably trace
 824     #! all roots.
 825     temp1 "stack_chain" f %alien-global
 826     temp1 temp1 [] MOV
 827     temp2 stack-reg cell neg [+] LEA
 828     temp1 [] temp2 MOV
 829     callback-allowed? [
 830         temp1 2 cells [+] ds-reg MOV
 831         temp1 3 cells [+] rs-reg MOV
 832     ] when ;
 833
 834 M: x86 value-struct? drop t ;
 835
 836 M: x86 small-enough? ( n -- ? )
 837     HEX: -80000000 HEX: 7fffffff between? ;
 838
 839 : next-stack@ ( n -- operand )
 840     #! nth parameter from the next stack frame. Used to box
 841     #! input values to callbacks; the callback has its own
 842     #! stack frame set up, and we want to read the frame
 843     #! set up by the caller.
 844     stack-frame get total-size>> + stack@ ;
 845
 846 : enable-sse2 ( -- )
 847     enable-float-intrinsics
 848     enable-fsqrt
 849     enable-float-min/max
 850     enable-sse2-simd ;
 851
 852 : enable-sse3 ( -- )
 853     enable-sse2
 854     enable-sse3-simd ;
 855
 856 enable-min/max
 857
 858 :: install-sse-check ( version -- )
 859     [
 860         sse-version version < [
 861             "This image was built to use " write
 862             version sse-string write
 863             " but your CPU only supports " write
 864             sse-version sse-string write "." print
 865             "You will need to bootstrap Factor again." print
 866             flush
 867             1 exit
 868         ] when
 869     ] "cpu.x86" add-init-hook ;
 870
 871 : enable-sse ( version -- )
 872     {
 873         { 00 [ ] }
 874         { 10 [ ] }
 875         { 20 [ enable-sse2 ] }
 876         { 30 [ enable-sse3 ] }
 877         { 33 [ enable-sse3 ] }
 878         { 41 [ enable-sse3 ] }
 879         { 42 [ enable-sse3 ] }
 880     } case ;
 881
 882 [ { sse_version } compile ] with-optimizer
 883
 884 "Checking for multimedia extensions: " write sse-version 30 min
 885 [ sse-string write " detected" print ]
 886 [ install-sse-check ]
 887 [ enable-sse ] tri