From: Joe Groff <arcata@gmail.com>
Date: Mon, 28 Sep 2009 03:21:42 +0000 (-0500)
Subject: Merge branch 'master' of git://factorcode.org/git/factor
X-Git-Tag: 0.97~5462^2~1
X-Git-Url: https://gitweb.factorcode.org/gitweb.cgi?p=factor.git;a=commitdiff_plain;h=a14855b98a8e87d7edcb5ddf72f4494880525292;hp=866b62755ebdda8d3274da5914c672b08df34cd5

Merge branch 'master' of git://factorcode.org/git/factor
---

diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor
index 6f5a05c672..5b494a39d9 100644
--- a/basis/compiler/cfg/instructions/instructions.factor
+++ b/basis/compiler/cfg/instructions/instructions.factor
@@ -657,7 +657,8 @@ literal: label
 def: dst/int-rep
 use: src1/int-rep src2/int-rep ;
 
-TUPLE: spill-slot n ; C: <spill-slot> spill-slot
+TUPLE: spill-slot { n integer } ;
+C: <spill-slot> spill-slot
 
 INSN: _gc
 temp: temp1 temp2
@@ -667,11 +668,11 @@ literal: data-values tagged-values uninitialized-locs ;
 ! virtual registers
 INSN: _spill
 use: src
-literal: rep n ;
+literal: rep dst ;
 
 INSN: _reload
 def: dst
-literal: rep n ;
+literal: rep src ;
 
 INSN: _spill-area-size
 literal: n ;
diff --git a/basis/compiler/cfg/linear-scan/allocation/allocation.factor b/basis/compiler/cfg/linear-scan/allocation/allocation.factor
index c23867ffe2..ac32265e65 100644
--- a/basis/compiler/cfg/linear-scan/allocation/allocation.factor
+++ b/basis/compiler/cfg/linear-scan/allocation/allocation.factor
@@ -34,11 +34,15 @@ IN: compiler.cfg.linear-scan.allocation
         [ drop assign-blocked-register ]
     } cond ;
 
+: spill-at-sync-point ( live-interval n -- ? )
+    ! If the live interval has a usage at 'n', don't spill it,
+    ! since this means its being defined by the sync point
+    ! instruction. Output t if this is the case.
+    2dup [ uses>> ] dip swap member? [ 2drop t ] [ spill f ] if ;
+
 : handle-sync-point ( n -- )
     [ active-intervals get values ] dip
-    [ '[ [ _ spill ] each ] each ]
-    [ drop [ delete-all ] each ]
-    2bi ;
+    '[ [ _ spill-at-sync-point ] filter-here ] each ;
 
 :: handle-progress ( n sync? -- )
     n {
diff --git a/basis/compiler/cfg/linear-scan/allocation/state/state.factor b/basis/compiler/cfg/linear-scan/allocation/state/state.factor
index a311f97b66..3ae000891e 100644
--- a/basis/compiler/cfg/linear-scan/allocation/state/state.factor
+++ b/basis/compiler/cfg/linear-scan/allocation/state/state.factor
@@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors assocs combinators cpu.architecture fry heaps
 kernel math math.order namespaces sequences vectors
-compiler.cfg compiler.cfg.registers
+compiler.cfg compiler.cfg.registers compiler.cfg.instructions
 compiler.cfg.linear-scan.live-intervals ;
 IN: compiler.cfg.linear-scan.allocation.state
 
@@ -118,7 +118,8 @@ SYMBOL: unhandled-intervals
 
 : next-spill-slot ( rep -- n )
     rep-size cfg get
-    [ swap [ align dup ] [ + ] bi ] change-spill-area-size drop ;
+    [ swap [ align dup ] [ + ] bi ] change-spill-area-size drop
+    <spill-slot> ;
 
 ! Minheap of sync points which still need to be processed
 SYMBOL: unhandled-sync-points
@@ -126,7 +127,7 @@ SYMBOL: unhandled-sync-points
 ! Mapping from vregs to spill slots
 SYMBOL: spill-slots
 
-: vreg-spill-slot ( vreg -- n )
+: vreg-spill-slot ( vreg -- spill-slot )
     spill-slots get [ rep-of next-spill-slot ] cache ;
 
 : init-allocator ( registers -- )
diff --git a/basis/compiler/cfg/linear-scan/assignment/assignment.factor b/basis/compiler/cfg/linear-scan/assignment/assignment.factor
index 572107be6c..8959add822 100644
--- a/basis/compiler/cfg/linear-scan/assignment/assignment.factor
+++ b/basis/compiler/cfg/linear-scan/assignment/assignment.factor
@@ -33,7 +33,7 @@ ERROR: bad-vreg vreg ;
 : (vreg>reg) ( vreg pending -- reg )
     ! If a live vreg is not in the pending set, then it must
     ! have been spilled.
-    ?at [ spill-slots get ?at [ <spill-slot> ] [ bad-vreg ] if ] unless ;
+    ?at [ spill-slots get ?at [ ] [ bad-vreg ] if ] unless ;
 
 : vreg>reg ( vreg -- reg )
     pending-interval-assoc get (vreg>reg) ;
diff --git a/basis/compiler/cfg/linear-scan/linear-scan-tests.factor b/basis/compiler/cfg/linear-scan/linear-scan-tests.factor
index f09fe403e6..77c9e348c9 100644
--- a/basis/compiler/cfg/linear-scan/linear-scan-tests.factor
+++ b/basis/compiler/cfg/linear-scan/linear-scan-tests.factor
@@ -92,7 +92,7 @@ H{
        { end 2 }
        { uses V{ 0 1 } }
        { ranges V{ T{ live-range f 0 2 } } }
-       { spill-to 0 }
+       { spill-to T{ spill-slot f 0 } }
     }
     T{ live-interval
        { vreg 1 }
@@ -100,7 +100,7 @@ H{
        { end 5 }
        { uses V{ 5 } }
        { ranges V{ T{ live-range f 5 5 } } }
-       { reload-from 0 }
+       { reload-from T{ spill-slot f 0 } }
     }
 ] [
     T{ live-interval
@@ -119,7 +119,7 @@ H{
        { end 1 }
        { uses V{ 0 } }
        { ranges V{ T{ live-range f 0 1 } } }
-       { spill-to 4 }
+       { spill-to T{ spill-slot f 4 } }
     }
     T{ live-interval
        { vreg 2 }
@@ -127,7 +127,7 @@ H{
        { end 5 }
        { uses V{ 1 5 } }
        { ranges V{ T{ live-range f 1 5 } } }
-       { reload-from 4 }
+       { reload-from T{ spill-slot f 4 } }
     }
 ] [
     T{ live-interval
@@ -146,7 +146,7 @@ H{
        { end 1 }
        { uses V{ 0 } }
        { ranges V{ T{ live-range f 0 1 } } }
-       { spill-to 8 }
+       { spill-to T{ spill-slot f 8 } }
     }
     T{ live-interval
        { vreg 3 }
@@ -154,7 +154,7 @@ H{
        { end 30 }
        { uses V{ 20 30 } }
        { ranges V{ T{ live-range f 20 30 } } }
-       { reload-from 8 }
+       { reload-from T{ spill-slot f 8 } }
     }
 ] [
     T{ live-interval
@@ -1042,8 +1042,8 @@ V{
 
 [ _spill ] [ 1 get instructions>> second class ] unit-test
 [ _reload ] [ 4 get instructions>> 4 swap nth class ] unit-test
-[ V{ 3 2 1 } ] [ 8 get instructions>> [ _spill? ] filter [ n>> cell / ] map ] unit-test
-[ V{ 3 2 1 } ] [ 9 get instructions>> [ _reload? ] filter [ n>> cell / ] map ] unit-test
+[ V{ 3 2 1 } ] [ 8 get instructions>> [ _spill? ] filter [ dst>> n>> cell / ] map ] unit-test
+[ V{ 3 2 1 } ] [ 9 get instructions>> [ _reload? ] filter [ src>> n>> cell / ] map ] unit-test
 
 ! Resolve pass should insert this
 [ _reload ] [ 5 get predecessors>> first instructions>> first class ] unit-test
diff --git a/basis/compiler/cfg/linear-scan/resolve/resolve-tests.factor b/basis/compiler/cfg/linear-scan/resolve/resolve-tests.factor
index 47c1f0ae76..e7f291d613 100644
--- a/basis/compiler/cfg/linear-scan/resolve/resolve-tests.factor
+++ b/basis/compiler/cfg/linear-scan/resolve/resolve-tests.factor
@@ -17,7 +17,7 @@ IN: compiler.cfg.linear-scan.resolve.tests
 
 [
     {
-        T{ _reload { dst 1 } { rep int-rep } { n 0 } }
+        T{ _reload { dst 1 } { rep int-rep } { src T{ spill-slot f 0 } } }
     }
 ] [
     [
@@ -27,7 +27,7 @@ IN: compiler.cfg.linear-scan.resolve.tests
 
 [
     {
-        T{ _spill { src 1 } { rep int-rep } { n 0 } }
+        T{ _spill { src 1 } { rep int-rep } { dst T{ spill-slot f 0 } } }
     }
 ] [
     [
@@ -54,14 +54,14 @@ H{ } clone spill-temps set
     { { { 0 int-rep } { 1 int-rep } } { { 1 int-rep } { 0 int-rep } } }
     mapping-instructions {
         {
-            T{ _spill { src 0 } { rep int-rep } { n 8 } }
+            T{ _spill { src 0 } { rep int-rep } { dst T{ spill-slot f 8 } } }
             T{ ##copy { dst 0 } { src 1 } { rep int-rep } }
-            T{ _reload { dst 1 } { rep int-rep } { n 8 } }
+            T{ _reload { dst 1 } { rep int-rep } { src T{ spill-slot f 8 } } }
         }
         {
-            T{ _spill { src 1 } { rep int-rep } { n 8 } }
+            T{ _spill { src 1 } { rep int-rep } { dst T{ spill-slot f 8 } } }
             T{ ##copy { dst 1 } { src 0 } { rep int-rep } }
-            T{ _reload { dst 0 } { rep int-rep } { n 8 } }
+            T{ _reload { dst 0 } { rep int-rep } { src T{ spill-slot f 8 } } }
         }
     } member?
 ] unit-test
diff --git a/basis/compiler/cfg/linear-scan/resolve/resolve.factor b/basis/compiler/cfg/linear-scan/resolve/resolve.factor
index 15dff23448..20c9ee4e99 100644
--- a/basis/compiler/cfg/linear-scan/resolve/resolve.factor
+++ b/basis/compiler/cfg/linear-scan/resolve/resolve.factor
@@ -34,10 +34,10 @@ SYMBOL: spill-temps
     ] if ;
 
 : memory->register ( from to -- )
-    swap [ first2 ] [ first n>> ] bi* _reload ;
+    swap [ first2 ] [ first ] bi* _reload ;
 
 : register->memory ( from to -- )
-    [ first2 ] [ first n>> ] bi* _spill ;
+    [ first2 ] [ first ] bi* _spill ;
 
 : temp->register ( from to -- )
     nip [ first ] [ second ] [ second spill-temp ] tri _reload ;
diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor
index 0c9d7ab45a..76c47d2ef2 100755
--- a/basis/compiler/codegen/codegen.factor
+++ b/basis/compiler/codegen/codegen.factor
@@ -240,7 +240,7 @@ CODEGEN: _reload %reload
 GENERIC# save-gc-root 1 ( gc-root operand temp -- )
 
 M:: spill-slot save-gc-root ( gc-root operand temp -- )
-    temp int-rep operand n>> %reload
+    temp int-rep operand %reload
     gc-root temp %save-gc-root ;
 
 M: object save-gc-root drop %save-gc-root ;
@@ -253,7 +253,7 @@ GENERIC# load-gc-root 1 ( gc-root operand temp -- )
 
 M:: spill-slot load-gc-root ( gc-root operand temp -- )
     gc-root temp %load-gc-root
-    temp int-rep operand n>> %spill ;
+    temp int-rep operand %spill ;
 
 M: object load-gc-root drop %load-gc-root ;
 
diff --git a/basis/compiler/tests/codegen.factor b/basis/compiler/tests/codegen.factor
index 3dbde076a6..a4f19966b1 100644
--- a/basis/compiler/tests/codegen.factor
+++ b/basis/compiler/tests/codegen.factor
@@ -1,9 +1,10 @@
-USING: generalizations accessors arrays compiler kernel kernel.private
-math hashtables.private math.private namespaces sequences tools.test
-namespaces.private slots.private sequences.private byte-arrays alien
-alien.accessors layouts words definitions compiler.units io
-combinators vectors grouping make alien.c-types combinators.short-circuit
-math.order math.libm math.parser alien.c-types ;
+USING: generalizations accessors arrays compiler kernel
+kernel.private math hashtables.private math.private namespaces
+sequences tools.test namespaces.private slots.private
+sequences.private byte-arrays alien alien.accessors layouts
+words definitions compiler.units io combinators vectors grouping
+make alien.c-types combinators.short-circuit math.order
+math.libm math.parser math.functions ;
 FROM: math => float ;
 QUALIFIED: namespaces.private
 IN: compiler.tests.codegen
@@ -432,6 +433,7 @@ cell 4 = [
     ] compile-call
 ] unit-test
 
+! Bug in CSSA construction
 TUPLE: myseq { underlying1 byte-array read-only } { underlying2 byte-array read-only } ;
 
 [ 2 ] [
@@ -449,3 +451,13 @@ TUPLE: myseq { underlying1 byte-array read-only } { underlying2 byte-array read-
         ] 2curry each-integer
     ] compile-call
 ] unit-test
+
+! Bug in linear scan's partial sync point logic
+[ t ] [
+    [ 1.0 100 [ fsin ] times 1.0 float+ ] compile-call
+    1.168852488727981 1.e-9 ~
+] unit-test
+
+[ 65537.0 ] [
+    [ 2.0 4 [ 2.0 fpow ] times 1.0 float+ ] compile-call
+] unit-test
diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor
index c27aacb875..114e63209a 100644
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@@ -311,8 +311,8 @@ HOOK: %compare-imm-branch cpu ( label cc src1 src2 -- )
 HOOK: %compare-float-ordered-branch cpu ( label cc src1 src2 -- )
 HOOK: %compare-float-unordered-branch cpu ( label cc src1 src2 -- )
 
-HOOK: %spill cpu ( src rep n -- )
-HOOK: %reload cpu ( dst rep n -- )
+HOOK: %spill cpu ( src rep dst -- )
+HOOK: %reload cpu ( dst rep src -- )
 
 HOOK: %loop-entry cpu ( -- )
 
diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor
index 64df207975..90e38a802b 100644
--- a/basis/cpu/ppc/ppc.factor
+++ b/basis/cpu/ppc/ppc.factor
@@ -632,11 +632,11 @@ M:: ppc %compare-float-unordered-branch ( label src1 src2 cc -- )
         { stack-params [ [ [ 0 1 ] dip next-param@ LWZ 0 1 ] dip STW ] }
     } case ;
 
-M: ppc %spill ( src rep n -- )
-    swap [ spill@ ] dip store-to-frame ;
+M: ppc %spill ( src rep dst -- )
+    swap [ n>> spill@ ] dip store-to-frame ;
 
-M: ppc %reload ( dst rep n -- )
-    swap [ spill@ ] dip load-from-frame ;
+M: ppc %reload ( dst rep src -- )
+    swap [ n>> spill@ ] dip load-from-frame ;
 
 M: ppc %loop-entry ;
 
diff --git a/basis/cpu/x86/32/32.factor b/basis/cpu/x86/32/32.factor
index 5f6c0d4696..809e068430 100755
--- a/basis/cpu/x86/32/32.factor
+++ b/basis/cpu/x86/32/32.factor
@@ -282,6 +282,34 @@ M: x86.32 %callback-value ( ctype -- )
     ! Unbox EAX
     unbox-return ;
 
+GENERIC: float-function-param ( stack-slot dst src -- )
+
+M:: spill-slot float-function-param ( stack-slot dst src -- )
+    ! We can clobber dst here since its going to contain the
+    ! final result
+    dst src double-rep %copy
+    stack-slot dst double-rep %copy ;
+
+M: register float-function-param
+    nip double-rep %copy ;
+
+: float-function-return ( reg -- )
+    ESP [] FSTPL
+    ESP [] MOVSD
+    ESP 16 ADD ;
+
+M:: x86.32 %unary-float-function ( dst src func -- )
+    ESP -16 [+] dst src float-function-param
+    ESP 16 SUB
+    func f %alien-invoke
+    dst float-function-return ;
+
+M:: x86.32 %binary-float-function ( dst src1 src2 func -- )
+    ESP -16 [+] dst src1 float-function-param
+    ESP  -8 [+] dst src2 float-function-param
+    ESP 16 SUB
+    func f %alien-invoke
+    dst float-function-return ;
 
 M: x86.32 %cleanup ( params -- )
     #! a) If we just called an stdcall function in Windows, it
diff --git a/basis/cpu/x86/64/64.factor b/basis/cpu/x86/64/64.factor
index 562563039e..805dda982b 100644
--- a/basis/cpu/x86/64/64.factor
+++ b/basis/cpu/x86/64/64.factor
@@ -218,8 +218,8 @@ M: x86.64 %callback-value ( ctype -- )
     ! Unbox former top of data stack to return registers
     unbox-return ;
 
-: float-function-param ( i spill-slot -- )
-    [ float-regs param-regs nth ] [ n>> spill@ ] bi* MOVSD ;
+: float-function-param ( i src -- )
+    [ float-regs param-regs nth ] dip double-rep %copy ;
 
 : float-function-return ( reg -- )
     float-regs return-reg double-rep %copy ;
@@ -230,6 +230,8 @@ M:: x86.64 %unary-float-function ( dst src func -- )
     dst float-function-return ;
 
 M:: x86.64 %binary-float-function ( dst src1 src2 func -- )
+    ! src1 might equal dst; otherwise it will be a spill slot
+    ! src2 is always a spill slot
     0 src1 float-function-param
     1 src2 float-function-param
     func f %alien-invoke
@@ -249,9 +251,6 @@ M:: x86.64 %call-gc ( gc-root-count temp -- )
 ! x86-64.
 enable-alien-4-intrinsics
 
-! Enable fast calling of libc math functions
-enable-float-functions
-
 USE: vocabs.loader
 
 {
diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor
index d89e360d09..51b5cef23a 100644
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@@ -135,7 +135,10 @@ M: double-2-rep copy-register* drop MOVUPD ;
 M: vector-rep copy-register* drop MOVDQU ;
 
 M: x86 %copy ( dst src rep -- )
-    2over eq? [ 3drop ] [ copy-register* ] if ;
+    2over eq? [ 3drop ] [
+        [ [ dup spill-slot? [ n>> spill@ ] when ] bi@ ] dip
+        copy-register*
+    ] if ;
 
 :: overflow-template ( label dst src1 src2 insn -- )
     src1 src2 insn call
@@ -937,11 +940,8 @@ M: x86 %compare-float-ordered-branch ( label src1 src2 cc -- )
 M: x86 %compare-float-unordered-branch ( label src1 src2 cc -- )
     \ UCOMISD (%compare-float-branch) ;
 
-M:: x86 %spill ( src rep n -- )
-    n spill@ src rep %copy ;
-
-M:: x86 %reload ( dst rep n -- )
-    dst n spill@ rep %copy ;
+M:: x86 %spill ( src rep dst -- ) dst src rep %copy ;
+M:: x86 %reload ( dst rep src -- ) dst src rep %copy ;
 
 M: x86 %loop-entry 16 code-alignment [ NOP ] times ;
 
@@ -989,6 +989,7 @@ enable-fixnum-log2
         enable-float-intrinsics
         enable-fsqrt
         enable-float-min/max
+        enable-float-functions
         install-sse2-check
     ] when ;
 
diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor
index c76ed573d5..6ed74caa1f 100644
--- a/basis/math/vectors/simd/functor/functor.factor
+++ b/basis/math/vectors/simd/functor/functor.factor
@@ -111,6 +111,7 @@ N            [ 16 T heap-size /i ]
 A            DEFINES-CLASS ${T}-${N}
 A-boa        DEFINES ${A}-boa
 A-with       DEFINES ${A}-with
+A-cast       DEFINES ${A}-cast
 >A           DEFINES >${A}
 A{           DEFINES ${A}{
 
@@ -170,6 +171,9 @@ SYNTAX: A{ \ } [ >A ] parse-literal ;
     \ A-boa \ A-rep \ A define-boa-custom-inlining
 ] when
 
+: A-cast ( simd-array -- simd-array' )
+    underlying>> \ A boa ; inline
+
 INSTANCE: A sequence
 
 <PRIVATE
@@ -228,6 +232,7 @@ A/2-with     IS ${A/2}-with
 A            DEFINES-CLASS ${T}-${N}
 A-boa        DEFINES ${A}-boa
 A-with       DEFINES ${A}-with
+A-cast       DEFINES ${A}-cast
 >A           DEFINES >${A}
 A{           DEFINES ${A}{
 
@@ -295,6 +300,9 @@ M: A pprint* pprint-object ;
 
 \ A-rep 2 boa-effect \ A-boa set-stack-effect
 
+: A-cast ( simd-array -- simd-array' )
+    [ underlying1>> ] [ underlying2>> ] bi \ A boa ; inline
+
 INSTANCE: A sequence
 
 : A-vv->v-op ( v1 v2 quot -- v3 )
diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor
index 2fdb9ff88c..6dc0f87dd4 100644
--- a/basis/math/vectors/simd/simd-docs.factor
+++ b/basis/math/vectors/simd/simd-docs.factor
@@ -68,6 +68,7 @@ ARTICLE: "math.vectors.simd.words" "SIMD vector words"
     { "Word" "Stack effect" "Description" }
     { { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" }
     { { $snippet "type-boa" } { $snippet "( ... -- simd-array )" } "creates a new instance where components are read from the stack" }
+    { { $snipept "type-cast" } { $snippet "( simd-array -- simd-array' )" } "creates a new SIMD array where the underlying data is taken from another SIMD array, with no format conversion" }
     { { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" }
     { { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" }
 }