1 ! Copyright (C) 2009 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors alien.c-types byte-arrays cpu.architecture
4 kernel math math.functions math.vectors
5 math.vectors.simd.functor math.vectors.simd.intrinsics
6 math.vectors.specialization parser prettyprint.custom sequences
7 sequences.private locals assocs words fry ;
8 FROM: alien.c-types => float ;
19 "double" define-simd-128
20 "float" define-simd-128
21 "double" define-simd-256
22 "float" define-simd-256
26 : float-4-with ( x -- simd-array )
27 [ 4 ] dip >float '[ _ ] \ float-4 new replicate-as ;
29 : float-4-boa ( a b c d -- simd-array )
30 \ float-4 new 4sequence ;
32 : double-2-with ( x -- simd-array )
33 [ 2 ] dip >float '[ _ ] \ double-2 new replicate-as ;
35 : double-2-boa ( a b -- simd-array )
36 \ double-2 new 2sequence ;
38 ! More efficient expansions for the above, used when SIMD is
45 \ (simd-broadcast) "intrinsic" word-prop [
46 [ >float float-4-rep (simd-broadcast) \ float-4 boa ]
47 ] [ \ float-4-with def>> ] if
48 ] "custom-inlining" set-word-prop
52 \ (simd-gather-4) "intrinsic" word-prop [
54 a >float b >float c >float d >float
55 float-4-rep (simd-gather-4) \ float-4 boa
57 ] [ \ float-4-boa def>> ] if
58 ] "custom-inlining" set-word-prop
62 \ (simd-broadcast) "intrinsic" word-prop [
63 [ >float double-2-rep (simd-broadcast) \ double-2 boa ]
64 ] [ \ double-2-with def>> ] if
65 ] "custom-inlining" set-word-prop
69 \ (simd-gather-4) "intrinsic" word-prop [
70 [ [ >float ] bi@ double-2-rep (simd-gather-2) \ double-2 boa ]
71 ] [ \ double-2-boa def>> ] if
72 ] "custom-inlining" set-word-prop
76 : float-8-with ( x -- simd-array )
77 [ float-4-with ] [ float-4-with ] bi [ underlying>> ] bi@
78 \ float-8 boa ; inline
80 :: float-8-boa ( a b c d e f g h -- simd-array )
84 \ float-8 boa ; inline
86 : double-4-with ( x -- simd-array )
87 [ double-2-with ] [ double-2-with ] bi [ underlying>> ] bi@
88 \ double-4 boa ; inline
90 :: double-4-boa ( a b c d -- simd-array )
94 \ double-4 boa ; inline
100 ! Filter out operations that are not available, eg horizontal adds
101 ! on SSE2. Fallback code in math.vectors is used in that case.
103 : supported-simd-ops ( assoc -- assoc' )
112 } [ nip "intrinsic" word-prop ] assoc-filter
113 '[ drop _ key? ] assoc-filter ;
115 ! Some SIMD operations are defined in terms of others.
117 :: high-level-ops ( ctor -- assoc )
119 { vneg [ [ dup v- ] keep v- ] }
121 { n+v [ [ ctor execute ] dip v+ ] }
122 { v+n [ ctor execute v+ ] }
123 { n-v [ [ ctor execute ] dip v- ] }
124 { v-n [ ctor execute v- ] }
125 { n*v [ [ ctor execute ] dip v* ] }
126 { v*n [ ctor execute v* ] }
127 { n/v [ [ ctor execute ] dip v/ ] }
128 { v/n [ ctor execute v/ ] }
129 { norm-sq [ dup v. assert-positive ] }
130 { norm [ norm-sq sqrt ] }
131 { normalize [ dup norm v/n ] }
132 { distance [ v- norm ] }
135 :: simd-vector-words ( class ctor elt-type assoc -- )
136 class elt-type assoc supported-simd-ops ctor high-level-ops assoc-union
137 specialize-vector-words ;
141 \ float-4 \ float-4-with m:float H{
142 { v+ [ [ (simd-v+) ] float-4-vv->v-op ] }
143 { v- [ [ (simd-v-) ] float-4-vv->v-op ] }
144 { v* [ [ (simd-v*) ] float-4-vv->v-op ] }
145 { v/ [ [ (simd-v/) ] float-4-vv->v-op ] }
146 { vmin [ [ (simd-vmin) ] float-4-vv->v-op ] }
147 { vmax [ [ (simd-vmax) ] float-4-vv->v-op ] }
148 { sum [ [ (simd-sum) ] float-4-v->n-op ] }
151 \ double-2 \ double-2-with m:float H{
152 { v+ [ [ (simd-v+) ] double-2-vv->v-op ] }
153 { v- [ [ (simd-v-) ] double-2-vv->v-op ] }
154 { v* [ [ (simd-v*) ] double-2-vv->v-op ] }
155 { v/ [ [ (simd-v/) ] double-2-vv->v-op ] }
156 { vmin [ [ (simd-vmin) ] double-2-vv->v-op ] }
157 { vmax [ [ (simd-vmax) ] double-2-vv->v-op ] }
158 { sum [ [ (simd-sum) ] double-2-v->n-op ] }
161 \ float-8 \ float-8-with m:float H{
162 { v+ [ [ (simd-v+) ] float-8-vv->v-op ] }
163 { v- [ [ (simd-v-) ] float-8-vv->v-op ] }
164 { v* [ [ (simd-v*) ] float-8-vv->v-op ] }
165 { v/ [ [ (simd-v/) ] float-8-vv->v-op ] }
166 { vmin [ [ (simd-vmin) ] float-8-vv->v-op ] }
167 { vmax [ [ (simd-vmax) ] float-8-vv->v-op ] }
168 { sum [ [ (simd-sum) ] [ + ] float-8-v->n-op ] }
171 \ double-4 \ double-4-with m:float H{
172 { v+ [ [ (simd-v+) ] double-4-vv->v-op ] }
173 { v- [ [ (simd-v-) ] double-4-vv->v-op ] }
174 { v* [ [ (simd-v*) ] double-4-vv->v-op ] }
175 { v/ [ [ (simd-v/) ] double-4-vv->v-op ] }
176 { vmin [ [ (simd-vmin) ] double-4-vv->v-op ] }
177 { vmax [ [ (simd-vmax) ] double-4-vv->v-op ] }
178 { sum [ [ (simd-v+) ] [ (simd-sum) ] double-4-v->n-op ] }
185 "math.vectors.simd.alien" require