1 ! Copyright (C) 2009 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors alien.c-types byte-arrays cpu.architecture
4 kernel math math.functions math.vectors
5 math.vectors.simd.functor math.vectors.simd.intrinsics
6 math.vectors.specialization parser prettyprint.custom sequences
7 sequences.private locals assocs words fry ;
17 "double" define-simd-128
18 "float" define-simd-128
19 "double" define-simd-256
20 "float" define-simd-256
24 : float-4-with ( x -- simd-array )
25 [ 4 ] dip >float '[ _ ] \ float-4 new replicate-as ;
27 : float-4-boa ( a b c d -- simd-array )
28 \ float-4 new 4sequence ;
30 : double-2-with ( x -- simd-array )
31 [ 2 ] dip >float '[ _ ] \ double-2 new replicate-as ;
33 : double-2-boa ( a b -- simd-array )
34 \ double-2 new 2sequence ;
36 ! More efficient expansions for the above, used when SIMD is
43 \ (simd-broadcast) "intrinsic" word-prop [
44 [ >float float-4-rep (simd-broadcast) \ float-4 boa ]
45 ] [ \ float-4-with def>> ] if
46 ] "custom-inlining" set-word-prop
50 \ (simd-gather-4) "intrinsic" word-prop [
52 a >float b >float c >float d >float
53 float-4-rep (simd-gather-4) \ float-4 boa
55 ] [ \ float-4-boa def>> ] if
56 ] "custom-inlining" set-word-prop
60 \ (simd-broadcast) "intrinsic" word-prop [
61 [ >float double-2-rep (simd-broadcast) \ double-2 boa ]
62 ] [ \ double-2-with def>> ] if
63 ] "custom-inlining" set-word-prop
67 \ (simd-gather-4) "intrinsic" word-prop [
68 [ [ >float ] bi@ double-2-rep (simd-gather-2) \ double-2 boa ]
69 ] [ \ double-2-boa def>> ] if
70 ] "custom-inlining" set-word-prop
74 : float-8-with ( x -- simd-array )
75 [ float-4-with ] [ float-4-with ] bi [ underlying>> ] bi@
76 \ float-8 boa ; inline
78 :: float-8-boa ( a b c d e f g h -- simd-array )
82 \ float-8 boa ; inline
84 : double-4-with ( x -- simd-array )
85 [ double-2-with ] [ double-2-with ] bi [ underlying>> ] bi@
86 \ double-4 boa ; inline
88 :: double-4-boa ( a b c d -- simd-array )
92 \ double-4 boa ; inline
98 ! Filter out operations that are not available, eg horizontal adds
99 ! on SSE2. Fallback code in math.vectors is used in that case.
101 : supported-simd-ops ( assoc -- assoc' )
110 } [ nip "intrinsic" word-prop ] assoc-filter
111 '[ drop _ key? ] assoc-filter ;
113 ! Some SIMD operations are defined in terms of others.
115 :: high-level-ops ( ctor -- assoc )
117 { vneg [ [ dup v- ] keep v- ] }
119 { n+v [ [ ctor execute ] dip v+ ] }
120 { v+n [ ctor execute v+ ] }
121 { n-v [ [ ctor execute ] dip v- ] }
122 { v-n [ ctor execute v- ] }
123 { n*v [ [ ctor execute ] dip v* ] }
124 { v*n [ ctor execute v* ] }
125 { n/v [ [ ctor execute ] dip v/ ] }
126 { v/n [ ctor execute v/ ] }
127 { norm-sq [ dup v. assert-positive ] }
128 { norm [ norm-sq sqrt ] }
129 { normalize [ dup norm v/n ] }
130 { distance [ v- norm ] }
133 :: simd-vector-words ( class ctor elt-type assoc -- )
134 class elt-type assoc supported-simd-ops ctor high-level-ops assoc-union
135 specialize-vector-words ;
139 \ float-4 \ float-4-with float H{
140 { v+ [ [ (simd-v+) ] float-4-vv->v-op ] }
141 { v- [ [ (simd-v-) ] float-4-vv->v-op ] }
142 { v* [ [ (simd-v*) ] float-4-vv->v-op ] }
143 { v/ [ [ (simd-v/) ] float-4-vv->v-op ] }
144 { vmin [ [ (simd-vmin) ] float-4-vv->v-op ] }
145 { vmax [ [ (simd-vmax) ] float-4-vv->v-op ] }
146 { sum [ [ (simd-sum) ] float-4-v->n-op ] }
149 \ double-2 \ double-2-with float H{
150 { v+ [ [ (simd-v+) ] double-2-vv->v-op ] }
151 { v- [ [ (simd-v-) ] double-2-vv->v-op ] }
152 { v* [ [ (simd-v*) ] double-2-vv->v-op ] }
153 { v/ [ [ (simd-v/) ] double-2-vv->v-op ] }
154 { vmin [ [ (simd-vmin) ] double-2-vv->v-op ] }
155 { vmax [ [ (simd-vmax) ] double-2-vv->v-op ] }
156 { sum [ [ (simd-sum) ] double-2-v->n-op ] }
159 \ float-8 \ float-8-with float H{
160 { v+ [ [ (simd-v+) ] float-8-vv->v-op ] }
161 { v- [ [ (simd-v-) ] float-8-vv->v-op ] }
162 { v* [ [ (simd-v*) ] float-8-vv->v-op ] }
163 { v/ [ [ (simd-v/) ] float-8-vv->v-op ] }
164 { vmin [ [ (simd-vmin) ] float-8-vv->v-op ] }
165 { vmax [ [ (simd-vmax) ] float-8-vv->v-op ] }
166 { sum [ [ (simd-sum) ] [ + ] float-8-v->n-op ] }
169 \ double-4 \ double-4-with float H{
170 { v+ [ [ (simd-v+) ] double-4-vv->v-op ] }
171 { v- [ [ (simd-v-) ] double-4-vv->v-op ] }
172 { v* [ [ (simd-v*) ] double-4-vv->v-op ] }
173 { v/ [ [ (simd-v/) ] double-4-vv->v-op ] }
174 { vmin [ [ (simd-vmin) ] double-4-vv->v-op ] }
175 { vmax [ [ (simd-vmax) ] double-4-vv->v-op ] }
176 { sum [ [ (simd-v+) ] [ (simd-sum) ] double-4-v->n-op ] }
183 "math.vectors.simd.alien" require