From ba7cb611335eb8730374f44876ecd1aeb06655f6 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sat, 22 May 2010 01:25:10 -0400 Subject: [PATCH] Stack allocation improvements - New with-out-parameters combinator - Inhibit tail call optimization in frames with local allocation, to ensure that passing a stack allocated value to the last word in the quotation works - local allocations are now aligned properly - spill slots are now aligned properly aligned in frames which have parameter and local allocation areas --- basis/alien/c-types/c-types-docs.factor | 22 +++--- basis/alien/c-types/c-types.factor | 27 ++++--- basis/alien/data/data.factor | 25 ++++++- basis/alien/parser/parser.factor | 4 +- basis/classes/struct/struct.factor | 9 +-- .../build-stack-frame.factor | 74 ++++++++++--------- basis/compiler/cfg/builder/alien/alien.factor | 7 +- .../cfg/builder/alien/boxing/boxing.factor | 4 +- basis/compiler/cfg/cfg.factor | 2 +- .../cfg/instructions/instructions.factor | 2 +- .../compiler/cfg/intrinsics/intrinsics.factor | 1 + .../compiler/cfg/intrinsics/misc/misc.factor | 9 ++- .../linear-scan/allocation/state/state.factor | 14 +++- .../cfg/linear-scan/linear-scan-tests.factor | 2 +- .../cfg/stack-frame/stack-frame.factor | 24 +++--- basis/compiler/tests/alien.factor | 17 +++++ .../propagation/transforms/transforms.factor | 4 +- basis/cpu/architecture/architecture.factor | 2 +- basis/cpu/x86/x86.factor | 4 +- basis/math/vectors/simd/simd-tests.factor | 62 +++++++++++++--- basis/math/vectors/simd/simd.factor | 4 +- .../specialized-arrays.factor | 8 +- 22 files changed, 202 insertions(+), 125 deletions(-) diff --git a/basis/alien/c-types/c-types-docs.factor b/basis/alien/c-types/c-types-docs.factor index 27a2729194..32c1d18d51 100644 --- a/basis/alien/c-types/c-types-docs.factor +++ b/basis/alien/c-types/c-types-docs.factor @@ -1,13 +1,13 @@ USING: alien alien.complex help.syntax help.markup libc kernel.private byte-arrays strings hashtables alien.syntax alien.strings sequences io.encodings.string debugger destructors vocabs.loader -classes.struct ; +classes.struct math kernel ; QUALIFIED: math QUALIFIED: sequences IN: alien.c-types HELP: heap-size -{ $values { "name" "a C type name" } { "size" math:integer } } +{ $values { "name" c-type-name } { "size" math:integer } } { $description "Outputs the number of bytes needed for a heap-allocated value of this C type." } { $examples { $example "USING: alien alien.c-types prettyprint ;\nint heap-size ." "4" } @@ -19,24 +19,24 @@ HELP: { $description "Creates a prototypical C type. User code should use higher-level facilities to define C types; see " { $link "c-data" } "." } ; HELP: no-c-type -{ $values { "name" "a C type name" } } +{ $values { "name" c-type-name } } { $description "Throws a " { $link no-c-type } " error." } { $error-description "Thrown by " { $link c-type } " if a given string does not name a C type. When thrown during compile time, indicates a typo in an " { $link alien-invoke } " or " { $link alien-callback } " form." } ; HELP: c-type -{ $values { "name" "a C type" } { "c-type" c-type } } +{ $values { "name" c-type-name } { "c-type" c-type } } { $description "Looks up a C type by name." } { $errors "Throws a " { $link no-c-type } " error if the type does not exist, or the word is not a C type." } ; -HELP: c-getter -{ $values { "name" "a C type" } { "quot" { $quotation "( c-ptr n -- obj )" } } } -{ $description "Outputs a quotation which reads values of this C type from a C structure." } +HELP: alien-value +{ $values { "c-ptr" c-ptr } { "offset" integer } { "c-type" c-type-name } { "value" object } } +{ $description "Loads a value at a byte offset from a base C pointer." } { $errors "Throws a " { $link no-c-type } " error if the type does not exist." } ; -HELP: c-setter -{ $values { "name" "a C type" } { "quot" { $quotation "( obj c-ptr n -- )" } } } -{ $description "Outputs a quotation which writes values of this C type to a C structure." } -{ $errors "Throws an error if the type does not exist." } ; +HELP: set-alien-value +{ $values { "value" object } { "c-ptr" c-ptr } { "offset" integer } { "c-type" c-type-name } } +{ $description "Stores a value at a byte offset from a base C pointer." } +{ $errors "Throws a " { $link no-c-type } " error if the type does not exist." } ; HELP: define-deref { $values { "c-type" "a C type" } } diff --git a/basis/alien/c-types/c-types.factor b/basis/alien/c-types/c-types.factor index 7f66417a55..412bf9259a 100644 --- a/basis/alien/c-types/c-types.factor +++ b/basis/alien/c-types/c-types.factor @@ -6,7 +6,7 @@ words splitting cpu.architecture alien alien.accessors alien.strings quotations layouts system compiler.units io io.files io.encodings.binary io.streams.memory accessors combinators effects continuations fry classes vocabs -vocabs.loader words.symbol ; +vocabs.loader words.symbol macros ; QUALIFIED: math IN: alien.c-types @@ -93,7 +93,7 @@ GENERIC: c-type-setter ( name -- quot ) M: c-type c-type-setter setter>> ; -GENERIC: c-type-align ( name -- n ) +GENERIC: c-type-align ( name -- n ) foldable M: abstract-c-type c-type-align align>> ; @@ -115,18 +115,22 @@ M: abstract-c-type heap-size size>> ; MIXIN: value-type -: c-getter ( name -- quot ) +MACRO: alien-value ( c-type -- quot: ( c-ptr offset -- value ) ) [ c-type-getter ] [ c-type-boxer-quot ] bi append ; -: c-setter ( name -- quot ) +MACRO: set-alien-value ( c-type -- quot: ( value c-ptr offset -- ) ) [ c-type-unboxer-quot [ [ ] ] [ '[ _ 2dip ] ] if-empty ] [ c-type-setter ] bi append ; -: array-accessor ( c-type quot -- def ) - [ - \ swap , [ heap-size , [ * >fixnum ] % ] [ % ] bi* - ] [ ] make ; +: array-accessor ( n c-ptr c-type -- c-ptr offset c-type ) + [ swapd heap-size * >fixnum ] keep ; inline + +: alien-element ( n c-ptr c-type -- value ) + array-accessor alien-value ; inline + +: set-alien-element ( value n c-ptr c-type -- ) + array-accessor set-alien-value ; inline PROTOCOL: c-type-protocol c-type-class @@ -159,12 +163,13 @@ TUPLE: long-long-type < c-type ; long-long-type new ; : define-deref ( c-type -- ) - [ name>> CHAR: * prefix "alien.c-types" create ] [ c-getter 0 prefix ] bi - (( c-ptr -- value )) define-inline ; + [ name>> CHAR: * prefix "alien.c-types" create ] + [ '[ 0 _ alien-value ] ] + bi (( c-ptr -- value )) define-inline ; : define-out ( c-type -- ) [ name>> "alien.c-types" constructor-word ] - [ dup c-setter '[ _ heap-size (byte-array) [ 0 @ ] keep ] ] bi + [ dup '[ _ heap-size (byte-array) [ 0 _ set-alien-value ] keep ] ] bi (( value -- c-ptr )) define-inline ; : define-primitive-type ( c-type name -- ) diff --git a/basis/alien/data/data.factor b/basis/alien/data/data.factor index df57e6faa4..81b53a1b39 100644 --- a/basis/alien/data/data.factor +++ b/basis/alien/data/data.factor @@ -2,7 +2,7 @@ USING: accessors alien alien.c-types alien.arrays alien.strings arrays byte-arrays cpu.architecture fry io io.encodings.binary io.files io.streams.memory kernel libc math sequences words -macros ; +macros combinators generalizations ; IN: alien.data GENERIC: require-c-array ( c-type -- ) @@ -80,12 +80,29 @@ ERROR: local-allocation-error ; : with-scoped-allocation ( c-types quot -- ) - [ (local-allots) ] dip call ; inline + [ [ (local-allots) ] [ box-values ] bi ] dip call + (cleanup-allot) ; inline + +: with-out-parameters ( c-types quot finish -- values ) + [ [ drop (local-allots) ] [ swap out-parameters ] 2bi ] dip call + (cleanup-allot) ; inline diff --git a/basis/alien/parser/parser.factor b/basis/alien/parser/parser.factor index dea9627970..332683a0ac 100755 --- a/basis/alien/parser/parser.factor +++ b/basis/alien/parser/parser.factor @@ -168,8 +168,8 @@ PREDICATE: alien-callback-type-word < typedef-word "callback-effect" word-prop ; : global-quot ( type word -- quot ) - name>> current-library get '[ _ _ address-of 0 ] - swap c-getter append ; + swap [ name>> current-library get ] dip + '[ _ _ address-of 0 _ alien-value ] ; : define-global ( type word -- ) [ nip ] [ global-quot ] 2bi (( -- value )) define-declared ; diff --git a/basis/classes/struct/struct.factor b/basis/classes/struct/struct.factor index 5a0f21c750..97dbe16d30 100644 --- a/basis/classes/struct/struct.factor +++ b/basis/classes/struct/struct.factor @@ -101,8 +101,7 @@ MACRO: ( class -- quot: ( ... -- struct ) ) GENERIC: (reader-quot) ( slot -- quot ) M: struct-slot-spec (reader-quot) - [ type>> c-getter ] - [ offset>> [ >c-ptr ] swap suffix ] bi prepend ; + [ offset>> ] [ type>> ] bi '[ >c-ptr _ _ alien-value ] ; M: struct-bit-slot-spec (reader-quot) [ [ offset>> ] [ bits>> ] bi bit-reader ] @@ -113,12 +112,10 @@ M: struct-bit-slot-spec (reader-quot) GENERIC: (writer-quot) ( slot -- quot ) M: struct-slot-spec (writer-quot) - [ type>> c-setter ] - [ offset>> [ >c-ptr ] swap suffix ] bi prepend ; + [ offset>> ] [ type>> ] bi '[ >c-ptr _ _ set-alien-value ] ; M: struct-bit-slot-spec (writer-quot) - [ offset>> ] [ bits>> ] bi bit-writer - [ >c-ptr ] prepose ; + [ offset>> ] [ bits>> ] bi bit-writer [ >c-ptr ] prepose ; : (boxer-quot) ( class -- quot ) '[ _ memory>struct ] ; diff --git a/basis/compiler/cfg/build-stack-frame/build-stack-frame.factor b/basis/compiler/cfg/build-stack-frame/build-stack-frame.factor index 70a02658d3..a973a3721c 100644 --- a/basis/compiler/cfg/build-stack-frame/build-stack-frame.factor +++ b/basis/compiler/cfg/build-stack-frame/build-stack-frame.factor @@ -1,33 +1,33 @@ ! Copyright (C) 2008, 2010 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: namespaces accessors math math.order assocs kernel sequences -combinators classes words system cpu.architecture layouts compiler.cfg -compiler.cfg.rpo compiler.cfg.instructions -compiler.cfg.registers compiler.cfg.stack-frame ; +USING: namespaces accessors math math.order assocs kernel +sequences combinators classes words system fry locals +cpu.architecture layouts compiler.cfg compiler.cfg.rpo +compiler.cfg.instructions compiler.cfg.registers +compiler.cfg.stack-frame ; IN: compiler.cfg.build-stack-frame -SYMBOL: local-allot +SYMBOLS: param-area-size allot-area-size allot-area-align +frame-required? ; -SYMBOL: frame-required? +: frame-required ( -- ) frame-required? on ; GENERIC: compute-stack-frame* ( insn -- ) -: frame-required ( -- ) frame-required? on ; - -: request-stack-frame ( stack-frame -- ) +M:: ##local-allot compute-stack-frame* ( insn -- ) frame-required - stack-frame [ max-stack-frame ] change ; - -M: ##local-allot compute-stack-frame* - local-allot get >>offset - size>> local-allot +@ ; + insn size>> :> s + insn align>> :> a + allot-area-align [ a max ] change + allot-area-size [ a align [ insn offset<< ] [ s + ] bi ] change ; M: ##stack-frame compute-stack-frame* - stack-frame>> request-stack-frame ; + frame-required + stack-frame>> param-area-size [ max ] change ; : vm-frame-required ( -- ) frame-required - stack-frame new vm-stack-space >>params request-stack-frame ; + vm-stack-space param-area-size [ max ] change ; M: ##call-gc compute-stack-frame* drop vm-frame-required ; M: ##box compute-stack-frame* drop vm-frame-required ; @@ -51,25 +51,27 @@ M: ##integer>float compute-stack-frame* M: insn compute-stack-frame* drop ; -: request-spill-area ( n -- ) - stack-frame new swap >>spill-area-size request-stack-frame ; - -: request-local-allot ( n -- ) - stack-frame new swap >>local-allot request-stack-frame ; - -: compute-stack-frame ( cfg -- ) - 0 local-allot set - stack-frame new stack-frame set - [ spill-area-size>> [ request-spill-area ] unless-zero ] - [ [ instructions>> [ compute-stack-frame* ] each ] each-basic-block ] bi - local-allot get [ request-local-allot ] unless-zero - stack-frame get dup stack-frame-size >>total-size drop ; +: finalize-stack-frame ( stack-frame -- ) + dup [ params>> ] [ allot-area-align>> ] bi align >>allot-area-base + dup [ [ allot-area-base>> ] [ allot-area-size>> ] bi + ] [ spill-area-align>> ] bi align >>spill-area-base + dup stack-frame-size >>total-size drop ; + +: ( cfg -- stack-frame ) + [ stack-frame new ] dip + [ spill-area-size>> >>spill-area-size ] + [ spill-area-align>> >>spill-area-align ] bi + allot-area-size get >>allot-area-size + allot-area-align get >>allot-area-align + param-area-size get >>params + dup finalize-stack-frame ; + +: compute-stack-frame ( cfg -- stack-frame/f ) + [ [ instructions>> [ compute-stack-frame* ] each ] each-basic-block ] + [ frame-required? get [ ] [ drop f ] if ] + bi ; : build-stack-frame ( cfg -- cfg ) - [ - [ compute-stack-frame ] - [ - frame-required? get stack-frame get f ? - >>stack-frame - ] bi - ] with-scope ; + 0 param-area-size set + 0 allot-area-size set + cell allot-area-align set + dup compute-stack-frame >>stack-frame ; diff --git a/basis/compiler/cfg/builder/alien/alien.factor b/basis/compiler/cfg/builder/alien/alien.factor index 16da6675c8..7bf45e959a 100644 --- a/basis/compiler/cfg/builder/alien/alien.factor +++ b/basis/compiler/cfg/builder/alien/alien.factor @@ -23,7 +23,7 @@ IN: compiler.cfg.builder.alien : prepare-struct-caller ( vregs reps return -- vregs' reps' return-vreg/f ) dup large-struct? [ - heap-size f ^^local-allot [ + heap-size cell f ^^local-allot [ '[ _ prefix ] [ int-rep struct-return-on-stack? 2array prefix ] bi* ] keep @@ -93,12 +93,9 @@ M: array dlsym-valid? '[ _ dlsym ] any? ; _ [ alien-node-height ] bi ] emit-trivial-block ; inline -: ( stack-size -- stack-frame ) - stack-frame new swap >>params ; - : emit-stack-frame ( stack-size params -- ) [ [ return>> ] [ abi>> ] bi stack-cleanup ##cleanup ] - [ drop ##stack-frame ] + [ drop ##stack-frame ] 2bi ; M: #alien-invoke emit-node diff --git a/basis/compiler/cfg/builder/alien/boxing/boxing.factor b/basis/compiler/cfg/builder/alien/boxing/boxing.factor index 7c43a87fed..6f5f46b9c1 100644 --- a/basis/compiler/cfg/builder/alien/boxing/boxing.factor +++ b/basis/compiler/cfg/builder/alien/boxing/boxing.factor @@ -49,7 +49,7 @@ M: c-type unbox [ ^^unbox 1array ] [ nip f 2array 1array ] 2bi ; M: long-long-type unbox - [ 8 f ^^local-allot ] dip '[ _ unboxer>> ##unbox-long-long ] keep + [ 8 cell f ^^local-allot ] dip '[ _ unboxer>> ##unbox-long-long ] keep 0 cell [ int-rep f ^^load-memory-imm ] bi-curry@ bi 2array int-rep long-long-on-stack? 2array dup 2array ; @@ -67,7 +67,7 @@ M: long-long-type unbox-parameter unbox ; M: struct-c-type unbox-parameter dup value-struct? [ unbox ] [ - [ nip heap-size f ^^local-allot dup ] + [ nip heap-size cell f ^^local-allot dup ] [ [ ^^unbox-any-c-ptr ] dip explode-struct keys ] 2bi implode-struct 1array { { int-rep f } } diff --git a/basis/compiler/cfg/cfg.factor b/basis/compiler/cfg/cfg.factor index 4a343d1651..7fde6c1371 100644 --- a/basis/compiler/cfg/cfg.factor +++ b/basis/compiler/cfg/cfg.factor @@ -22,7 +22,7 @@ number M: basic-block hashcode* nip id>> ; TUPLE: cfg { entry basic-block } word label -spill-area-size +spill-area-size spill-area-align stack-frame frame-pointer? post-order linear-order diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 8edf100f91..174743fdfd 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -660,7 +660,7 @@ literal: n rep ; INSN: ##local-allot def: dst/int-rep -literal: size offset ; +literal: size align offset ; INSN: ##box def: dst/tagged-rep diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 11d063c430..bf8ba96c34 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -66,6 +66,7 @@ IN: compiler.cfg.intrinsics { byte-arrays:(byte-array) [ emit-(byte-array) ] } { kernel: [ emit-simple-allot ] } { alien.data.private:(local-allot) [ emit-local-allot ] } + { alien.data.private:(cleanup-allot) [ drop emit-cleanup-allot ] } { alien: [ emit- ] } { alien.accessors:alien-unsigned-1 [ int-rep alien.c-types:uchar emit-load-memory ] } { alien.accessors:set-alien-unsigned-1 [ int-rep alien.c-types:uchar emit-store-memory ] } diff --git a/basis/compiler/cfg/intrinsics/misc/misc.factor b/basis/compiler/cfg/intrinsics/misc/misc.factor index 03b8fb47f1..62bb15f953 100644 --- a/basis/compiler/cfg/intrinsics/misc/misc.factor +++ b/basis/compiler/cfg/intrinsics/misc/misc.factor @@ -54,7 +54,10 @@ IN: compiler.cfg.intrinsics.misc ] unary-op ; : emit-local-allot ( node -- ) - dup node-input-infos first literal>> dup integer? - [ nip ds-drop f ^^local-allot ^^box-alien ds-push ] - [ drop emit-primitive ] + dup node-input-infos first2 [ literal>> ] bi@ 2dup [ integer? ] both? + [ ds-drop ds-drop f ^^local-allot ^^box-alien ds-push drop ] + [ 2drop emit-primitive ] if ; + +: emit-cleanup-allot ( -- ) + [ ##no-tco ] emit-trivial-block ; diff --git a/basis/compiler/cfg/linear-scan/allocation/state/state.factor b/basis/compiler/cfg/linear-scan/allocation/state/state.factor index 89ec1b7785..e0cc80f15c 100644 --- a/basis/compiler/cfg/linear-scan/allocation/state/state.factor +++ b/basis/compiler/cfg/linear-scan/allocation/state/state.factor @@ -1,7 +1,7 @@ -! Copyright (C) 2009 Slava Pestov. +! Copyright (C) 2009, 2010 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: arrays accessors assocs combinators cpu.architecture fry -heaps kernel math math.order namespaces sequences vectors +heaps kernel math math.order namespaces layouts sequences vectors linked-assocs compiler.cfg compiler.cfg.registers compiler.cfg.instructions compiler.cfg.linear-scan.live-intervals ; @@ -122,6 +122,9 @@ SYMBOL: unhandled-intervals [ swap [ align dup ] [ + ] bi ] change-spill-area-size drop ; +: align-spill-area ( align -- ) + cfg get [ max ] change-spill-area-align drop ; + ! Minheap of sync points which still need to be processed SYMBOL: unhandled-sync-points @@ -129,7 +132,10 @@ SYMBOL: unhandled-sync-points SYMBOL: spill-slots : assign-spill-slot ( coalesced-vreg rep -- spill-slot ) - rep-size spill-slots get [ nip next-spill-slot ] 2cache ; + rep-size + [ align-spill-area ] + [ spill-slots get [ nip next-spill-slot ] 2cache ] + bi ; : lookup-spill-slot ( coalesced-vreg rep -- spill-slot ) rep-size 2array spill-slots get ?at [ ] [ bad-vreg ] if ; @@ -141,7 +147,7 @@ SYMBOL: spill-slots [ V{ } clone ] reg-class-assoc active-intervals set [ V{ } clone ] reg-class-assoc inactive-intervals set V{ } clone handled-intervals set - cfg get 0 >>spill-area-size drop + cfg get 0 >>spill-area-size cell >>spill-area-align drop H{ } clone spill-slots set -1 progress set ; diff --git a/basis/compiler/cfg/linear-scan/linear-scan-tests.factor b/basis/compiler/cfg/linear-scan/linear-scan-tests.factor index c6252c2ea6..873ba6ee5c 100644 --- a/basis/compiler/cfg/linear-scan/linear-scan-tests.factor +++ b/basis/compiler/cfg/linear-scan/linear-scan-tests.factor @@ -76,7 +76,7 @@ check-numbering? on { T{ live-range f 0 5 } } 0 split-ranges ] unit-test -cfg new 0 >>spill-area-size cfg set +cfg new 0 >>spill-area-size 4 >>spill-area-align cfg set H{ } spill-slots set H{ diff --git a/basis/compiler/cfg/stack-frame/stack-frame.factor b/basis/compiler/cfg/stack-frame/stack-frame.factor index 4ed192a21e..790d93a907 100644 --- a/basis/compiler/cfg/stack-frame/stack-frame.factor +++ b/basis/compiler/cfg/stack-frame/stack-frame.factor @@ -7,24 +7,20 @@ IN: compiler.cfg.stack-frame TUPLE: stack-frame { params integer } -{ local-allot integer } +{ allot-area-size integer } +{ allot-area-align integer } { spill-area-size integer } -{ total-size integer } ; +{ spill-area-align integer } + +{ total-size integer } +{ allot-area-base integer } +{ spill-area-base integer } ; -! Stack frame utilities : local-allot-offset ( n -- offset ) - stack-frame get params>> + ; + stack-frame get allot-area-base>> + ; : spill-offset ( n -- offset ) - stack-frame get [ params>> ] [ local-allot>> ] bi + + ; + stack-frame get spill-area-base>> + ; : (stack-frame-size) ( stack-frame -- n ) - [ params>> ] [ local-allot>> ] [ spill-area-size>> ] tri + + ; - -: max-stack-frame ( frame1 frame2 -- frame3 ) - [ stack-frame new ] 2dip - { - [ [ params>> ] bi@ max >>params ] - [ [ local-allot>> ] bi@ max >>local-allot ] - [ [ spill-area-size>> ] bi@ max >>spill-area-size ] - } 2cleave ; + [ spill-area-base>> ] [ spill-area-size>> ] bi + ; diff --git a/basis/compiler/tests/alien.factor b/basis/compiler/tests/alien.factor index 09bcf3e281..7045e64928 100755 --- a/basis/compiler/tests/alien.factor +++ b/basis/compiler/tests/alien.factor @@ -767,3 +767,20 @@ mingw? [ : blah ( -- x ) { RECT } [ 1.5 >>x 2.0 >>y [ x>> ] [ y>> ] bi * >fixnum ] with-scoped-allocation ; [ 3 ] [ blah ] unit-test + +: out-param-test ( -- b ) + { int } [ [ 12 ] dip 0 int set-alien-value ] [ ] with-out-parameters ; + +[ 12 ] [ out-param-test ] unit-test + +: out-param-callback ( -- a ) + void { int pointer: int } cdecl + [ [ 2 * ] dip 0 int set-alien-value ] alien-callback ; + +: out-param-indirect ( a a -- b ) + { int } [ + swap void { int pointer: int } cdecl + alien-indirect + ] [ ] with-out-parameters ; + +[ 12 ] [ 6 out-param-callback out-param-indirect ] unit-test diff --git a/basis/compiler/tree/propagation/transforms/transforms.factor b/basis/compiler/tree/propagation/transforms/transforms.factor index a89238dc5c..28de7abd4b 100644 --- a/basis/compiler/tree/propagation/transforms/transforms.factor +++ b/basis/compiler/tree/propagation/transforms/transforms.factor @@ -310,9 +310,7 @@ M\ set intersect [ intersect-quot ] 1 define-partial-eval ! We want to constant-fold calls to heap-size, and recompile those ! calls when a C type is redefined \ heap-size [ - dup word? [ - [ depends-on-definition ] [ heap-size '[ _ ] ] bi - ] [ drop f ] if + [ depends-on-c-type ] [ heap-size '[ _ ] ] bi ] 1 define-partial-eval ! Eliminates a few redundant checks here and there diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index c9934509fa..53f86d8e5c 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -586,7 +586,7 @@ HOOK: %store-reg-param cpu ( src reg rep -- ) HOOK: %store-stack-param cpu ( src n rep -- ) -HOOK: %local-allot cpu ( dst size offset -- ) +HOOK: %local-allot cpu ( dst size align offset -- ) ! Call a function to convert a value into a tagged pointer, ! possibly allocating a bignum, float, or alien instance, diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 83711b7b5d..38c51591e9 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -588,8 +588,8 @@ M:: x86 %store-stack-param ( src n rep -- ) M:: x86 %load-stack-param ( dst n rep -- ) dst n next-stack@ rep %copy ; -M: x86 %local-allot ( dst size offset -- ) - nip local-allot-offset special-offset stack@ LEA ; +M:: x86 %local-allot ( dst size align offset -- ) + dst offset local-allot-offset special-offset stack@ LEA ; M: x86 %alien-indirect ( src -- ) ?spill-slot CALL ; diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index 2a8298b989..9bc90cbf7e 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -6,7 +6,7 @@ tools.test vocabs assocs compiler.cfg.debugger words locals combinators cpu.architecture namespaces byte-arrays alien specialized-arrays classes.struct eval classes.algebra sets quotations math.constants compiler.units splitting math.matrices -math.vectors.simd.cords ; +math.vectors.simd.cords alien.data ; FROM: math.vectors.simd.intrinsics => alien-vector set-alien-vector ; QUALIFIED-WITH: alien.c-types c SPECIALIZED-ARRAY: c:float @@ -610,6 +610,17 @@ STRUCT: simd-struct [ ] [ char-16 new 1array stack. ] unit-test +! Test some sequence protocol stuff +[ t ] [ 4 double-4{ 1 2 3 4 } new-sequence double-4? ] unit-test +[ double-4{ 2 3 4 5 } ] [ double-4{ 1 2 3 4 } [ 1 + ] map ] unit-test + +! Test cross product +[ float-4{ 0.0 0.0 1.0 0.0 } ] [ float-4{ 1.0 0.0 0.0 0.0 } float-4{ 0.0 1.0 0.0 0.0 } cross ] unit-test +[ float-4{ 0.0 -1.0 0.0 0.0 } ] [ float-4{ 1.0 0.0 0.0 0.0 } float-4{ 0.0 0.0 1.0 0.0 } cross ] unit-test + +[ double-4{ 0.0 0.0 1.0 0.0 } ] [ double-4{ 1.0 0.0 0.0 0.0 } double-4{ 0.0 1.0 0.0 0.0 } cross ] unit-test +[ double-4{ 0.0 -1.0 0.0 0.0 } ] [ double-4{ 1.0 0.0 0.0 0.0 } double-4{ 0.0 0.0 1.0 0.0 } cross ] unit-test + ! CSSA bug [ 4000000 ] [ int-4{ 1000 1000 1000 1000 } @@ -650,13 +661,46 @@ STRUCT: simd-struct [ float-4{ 0 0 0 0 } ] [ 5.0 float-4{ 1 2 3 4 } float-4{ 4 5 6 7 } 0.0 simd-spill-test-2 ] unit-test -! Test some sequence protocol stuff -[ t ] [ 4 double-4{ 1 2 3 4 } new-sequence double-4? ] unit-test -[ double-4{ 2 3 4 5 } ] [ double-4{ 1 2 3 4 } [ 1 + ] map ] unit-test +USE: alien -! Test cross product -[ float-4{ 0.0 0.0 1.0 0.0 } ] [ float-4{ 1.0 0.0 0.0 0.0 } float-4{ 0.0 1.0 0.0 0.0 } cross ] unit-test -[ float-4{ 0.0 -1.0 0.0 0.0 } ] [ float-4{ 1.0 0.0 0.0 0.0 } float-4{ 0.0 0.0 1.0 0.0 } cross ] unit-test +: callback-1 ( -- c ) + c:int { c:int c:int c:int c:int c:int } cdecl [ + + + + ] alien-callback ; -[ double-4{ 0.0 0.0 1.0 0.0 } ] [ double-4{ 1.0 0.0 0.0 0.0 } double-4{ 0.0 1.0 0.0 0.0 } cross ] unit-test -[ double-4{ 0.0 -1.0 0.0 0.0 } ] [ double-4{ 1.0 0.0 0.0 0.0 } double-4{ 0.0 0.0 1.0 0.0 } cross ] unit-test +: indirect-1 ( x x x x x c -- y ) + c:int { c:int c:int c:int c:int c:int } cdecl alien-indirect ; inline + +: simd-spill-test-3 ( a b d c -- v ) + { float float-4 float-4 float } declare + [ [ 3.0 + ] 2dip v+ ] dip sin v*n n*v + 10 5 100 50 500 callback-1 indirect-1 665 assert= ; + +[ float-4{ 0 0 0 0 } ] +[ 5.0 float-4{ 1 2 3 4 } float-4{ 4 5 6 7 } 0.0 simd-spill-test-3 ] unit-test + +! Stack allocation of SIMD values -- make sure that everything is +! aligned right + +: simd-stack-test ( -- b c ) + { c:int float-4 } [ + [ 123 swap 0 c:int c:set-alien-value ] + [ float-4{ 1 2 3 4 } swap 0 float-4 c:set-alien-value ] bi* + ] [ ] with-out-parameters ; + +[ 123 float-4{ 1 2 3 4 } ] [ simd-stack-test ] unit-test + +! Stack allocation + spilling + +: (simd-stack-spill-test) ( -- n ) 17 ; + +: simd-stack-spill-test ( x -- b c ) + { c:int } [ + 123 swap 0 c:int c:set-alien-value + >float (simd-stack-spill-test) float-4-with swap cos v*n + ] [ ] with-out-parameters ; + +[ ] [ + 1.047197551196598 simd-stack-spill-test + [ float-4{ 8.5 8.5 8.5 8.5 } approx= t assert= ] + [ 123 assert= ] + bi* +] unit-test diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 708fcaa190..1c2f61c7c6 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -254,8 +254,6 @@ ELT [ A-rep rep-component-type ] N [ A-rep rep-length ] COERCER [ ELT c:c-type-class "coercer" word-prop [ ] or ] -SET-NTH [ ELT dup c:c-setter c:array-accessor ] - BOA-EFFECT [ N "n" { "v" } ] WHERE @@ -271,7 +269,7 @@ M: A nth-unsafe swap \ A-rep [ (simd-select) ] [ call-next-method ] vn->n-op ; inline M: A set-nth-unsafe [ ELT boolean>element ] 2dip - underlying>> SET-NTH call ; inline + underlying>> ELT c:set-alien-element ; inline : >A ( seq -- simd ) \ A new clone-like ; inline diff --git a/basis/specialized-arrays/specialized-arrays.factor b/basis/specialized-arrays/specialized-arrays.factor index 0e0b03c870..dc070f99b4 100644 --- a/basis/specialized-arrays/specialized-arrays.factor +++ b/basis/specialized-arrays/specialized-arrays.factor @@ -41,12 +41,8 @@ A DEFINES-CLASS ${T}-array malloc-A DEFINES malloc-${A} >A DEFINES >${A} A-cast DEFINES ${A}-cast - A{ DEFINES ${A}{ A@ DEFINES ${A}@ - -NTH [ T dup c-getter array-accessor ] -SET-NTH [ T dup c-setter array-accessor ] WHERE @@ -73,9 +69,9 @@ M: A clone [ underlying>> clone ] [ length>> ] bi ; inline M: A length length>> ; inline -M: A nth-unsafe underlying>> NTH call ; inline +M: A nth-unsafe underlying>> \ T alien-element ; inline -M: A set-nth-unsafe underlying>> SET-NTH call ; inline +M: A set-nth-unsafe underlying>> \ T set-alien-element ; inline : >A ( seq -- specialized-array ) A new clone-like ; -- 2.34.1