1 ! Copyright (C) 2009 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: alien alien.c-types alien.data assocs combinators
4 cpu.architecture compiler.cfg.comparisons fry generalizations
5 kernel libc macros math
6 math.vectors.conversion.backend
7 sequences sets effects accessors namespaces
8 lexer parser vocabs.parser words arrays math.vectors ;
9 IN: math.vectors.simd.intrinsics
11 ERROR: bad-simd-call word ;
15 : simd-effect ( word -- effect )
16 stack-effect [ in>> "rep" suffix ] [ out>> ] bi <effect> ;
17 : simd-conversion-effect ( word -- effect )
18 stack-effect [ in>> but-last "rep" suffix ] [ out>> ] bi <effect> ;
22 V{ } clone simd-ops set-global
24 : (SIMD-OP:) ( accum quot -- accum )
26 scan-word dup name>> "(simd-" ")" surround create-in
27 [ nip dup '[ _ bad-simd-call ] define ]
29 '[ _ dip set-stack-effect ]
30 [ 2array simd-ops get push ]
34 [ simd-effect ] (SIMD-OP:) ;
36 SYNTAX: SIMD-CONVERSION-OP:
37 [ simd-conversion-effect ] (SIMD-OP:) ;
70 SIMD-OP: vshuffle-elements
71 SIMD-OP: vshuffle-bytes
72 SIMD-OP: (vmerge-head)
73 SIMD-OP: (vmerge-tail)
84 SIMD-CONVERSION-OP: (v>float)
85 SIMD-CONVERSION-OP: (v>integer)
86 SIMD-CONVERSION-OP: (vpack-signed)
87 SIMD-CONVERSION-OP: (vpack-unsigned)
88 SIMD-CONVERSION-OP: (vunpack-head)
89 SIMD-CONVERSION-OP: (vunpack-tail)
91 : (simd-with) ( x rep -- v ) bad-simd-call ;
92 : (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
93 : (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
94 : (simd-select) ( v n rep -- x ) bad-simd-call ;
96 : assert-positive ( x -- y ) ;
98 : alien-vector ( c-ptr n rep -- value )
99 ! Inefficient version for when intrinsics are missing
100 [ swap <displaced-alien> ] dip rep-size memory>byte-array ;
102 : set-alien-vector ( value c-ptr n rep -- )
103 ! Inefficient version for when intrinsics are missing
104 [ swap <displaced-alien> swap ] dip rep-size memcpy ;
108 : rep-components ( rep -- n )
109 16 swap rep-component-type heap-size /i ; foldable
111 : rep-coercer ( rep -- quot )
113 { [ dup int-vector-rep? ] [ [ >fixnum ] ] }
114 { [ dup float-vector-rep? ] [ [ >float ] ] }
115 } cond nip ; foldable
117 : rep-coerce ( value rep -- value' )
118 rep-coercer call( value -- value' ) ; inline
120 CONSTANT: rep-gather-words
122 { 2 (simd-gather-2) }
123 { 4 (simd-gather-4) }
126 : rep-gather-word ( rep -- word )
127 rep-components rep-gather-words at ;
131 MACRO: (simd-boa) ( rep -- quot )
138 '[ _ _ napply _ _ execute ] ;
140 GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
142 : (%unpack-reps) ( -- reps )
143 %merge-vector-reps [ int-vector-rep? ] filter
144 %unpack-vector-head-reps union ;
146 : (%abs-reps) ( -- reps )
147 cc> %compare-vector-reps [ int-vector-rep? ] filter
148 %xor-vector-reps [ float-vector-rep? ] filter
150 [ { } ] [ { uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ] if-empty ;
152 : (%shuffle-imm-reps) ( -- reps )
153 %shuffle-vector-reps %shuffle-vector-imm-reps union ;
155 M: vector-rep supported-simd-op?
157 { \ (simd-v+) [ %add-vector-reps ] }
158 { \ (simd-vs+) [ %saturated-add-vector-reps ] }
159 { \ (simd-v+-) [ %add-sub-vector-reps ] }
160 { \ (simd-v-) [ %sub-vector-reps ] }
161 { \ (simd-vs-) [ %saturated-sub-vector-reps ] }
162 { \ (simd-vneg) [ %sub-vector-reps ] }
163 { \ (simd-v*) [ %mul-vector-reps ] }
164 { \ (simd-vs*) [ %saturated-mul-vector-reps ] }
165 { \ (simd-v/) [ %div-vector-reps ] }
166 { \ (simd-vmin) [ %min-vector-reps ] }
167 { \ (simd-vmax) [ %max-vector-reps ] }
168 { \ (simd-v.) [ %dot-vector-reps ] }
169 { \ (simd-vsqrt) [ %sqrt-vector-reps ] }
170 { \ (simd-sum) [ %horizontal-add-vector-reps ] }
171 { \ (simd-vabs) [ (%abs-reps) ] }
172 { \ (simd-vbitand) [ %and-vector-reps ] }
173 { \ (simd-vbitandn) [ %andn-vector-reps ] }
174 { \ (simd-vbitor) [ %or-vector-reps ] }
175 { \ (simd-vbitxor) [ %xor-vector-reps ] }
176 { \ (simd-vbitnot) [ %xor-vector-reps ] }
177 { \ (simd-vand) [ %and-vector-reps ] }
178 { \ (simd-vandn) [ %andn-vector-reps ] }
179 { \ (simd-vor) [ %or-vector-reps ] }
180 { \ (simd-vxor) [ %xor-vector-reps ] }
181 { \ (simd-vnot) [ %xor-vector-reps ] }
182 { \ (simd-vlshift) [ %shl-vector-reps ] }
183 { \ (simd-vrshift) [ %shr-vector-reps ] }
184 { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
185 { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
186 { \ (simd-vshuffle-elements) [ (%shuffle-imm-reps) ] }
187 { \ (simd-vshuffle-bytes) [ %shuffle-vector-reps ] }
188 { \ (simd-(vmerge-head)) [ %merge-vector-reps ] }
189 { \ (simd-(vmerge-tail)) [ %merge-vector-reps ] }
190 { \ (simd-(v>float)) [ %integer>float-vector-reps ] }
191 { \ (simd-(v>integer)) [ %float>integer-vector-reps ] }
192 { \ (simd-(vpack-signed)) [ %signed-pack-vector-reps ] }
193 { \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] }
194 { \ (simd-(vunpack-head)) [ (%unpack-reps) ] }
195 { \ (simd-(vunpack-tail)) [ (%unpack-reps) ] }
196 { \ (simd-v<=) [ cc<= %compare-vector-reps ] }
197 { \ (simd-v<) [ cc< %compare-vector-reps ] }
198 { \ (simd-v=) [ cc= %compare-vector-reps ] }
199 { \ (simd-v>) [ cc> %compare-vector-reps ] }
200 { \ (simd-v>=) [ cc>= %compare-vector-reps ] }
201 { \ (simd-vunordered?) [ cc/<>= %compare-vector-reps ] }
202 { \ (simd-gather-2) [ %gather-vector-2-reps ] }
203 { \ (simd-gather-4) [ %gather-vector-4-reps ] }
204 { \ (simd-vany?) [ %test-vector-reps ] }
205 { \ (simd-vall?) [ %test-vector-reps ] }
206 { \ (simd-vnone?) [ %test-vector-reps ] }