1 ! Copyright (C) 2009 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors assocs byte-arrays classes classes.algebra effects fry
4 functors generalizations kernel literals locals math math.functions
5 math.vectors math.vectors.private math.vectors.simd.intrinsics
6 math.vectors.conversion.backend
7 math.vectors.specialization parser prettyprint.custom sequences
8 sequences.private strings words definitions macros cpu.architecture
9 namespaces arrays quotations combinators combinators.short-circuit sets
11 QUALIFIED-WITH: alien.c-types c
12 QUALIFIED: math.private
13 IN: math.vectors.simd.functor
15 ERROR: bad-length got expected ;
17 : vector-true-value ( class -- value )
19 { [ dup integer class<= ] [ drop -1 ] }
20 { [ dup float class<= ] [ drop -1 bits>double ] }
23 : vector-false-value ( class -- value )
25 { [ dup integer class<= ] [ drop 0 ] }
26 { [ dup float class<= ] [ drop 0.0 ] }
29 : boolean>element ( bool/elt class -- elt )
31 { t [ vector-true-value ] }
32 { f [ vector-false-value ] }
36 MACRO: simd-boa ( rep class -- simd-array )
37 [ rep-components ] [ new ] bi* '[ _ _ nsequence ] ;
39 : can-be-unboxed? ( type -- ? )
41 { c:float [ \ math.private:float+ "intrinsic" word-prop ] }
42 { c:double [ \ math.private:float+ "intrinsic" word-prop ] }
43 [ c:heap-size cell < ]
46 : simd-boa-fast? ( rep -- ? )
47 [ dup rep-gather-word supported-simd-op? ]
48 [ rep-component-type can-be-unboxed? ]
51 :: define-boa-custom-inlining ( word rep class -- )
55 [ rep (simd-boa) class boa ]
57 ] "custom-inlining" set-word-prop ;
59 : simd-with ( rep class x -- simd-array )
60 [ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline
62 : simd-with/nth-fast? ( rep -- ? )
63 [ \ (simd-vshuffle-elements) supported-simd-op? ]
64 [ rep-component-type can-be-unboxed? ]
67 :: define-with-custom-inlining ( word rep class -- )
70 rep simd-with/nth-fast? [
71 [ rep rep-coerce rep (simd-with) class boa ]
73 ] "custom-inlining" set-word-prop ;
75 : simd-nth-fast ( rep -- quot )
76 [ rep-components ] keep
77 '[ swap _ '[ _ _ (simd-select) ] 2array ] map-index
78 '[ swap >fixnum _ case ] ;
80 : simd-nth-slow ( rep -- quot )
81 rep-component-type dup c:c-type-getter-boxer c:array-accessor ;
83 MACRO: simd-nth ( rep -- x )
84 dup simd-with/nth-fast? [ simd-nth-fast ] [ simd-nth-slow ] if ;
86 : boa-effect ( rep n -- effect )
87 [ rep-components ] dip *
88 [ CHAR: a + 1string ] map
89 { "simd-vector" } <effect> ;
91 : supported-simd-ops ( assoc rep -- assoc' )
93 '[ nip _ swap supported-simd-op? ] assoc-filter
94 '[ drop _ key? ] assoc-filter ;
96 ERROR: bad-schema op schema ;
98 :: op-wrapper ( op specials schemas -- wrapper )
101 [ word-schema schemas at ]
102 [ dup word-schema bad-schema ]
105 : low-level-ops ( simd-ops specials schemas -- alist )
106 '[ 1quotation over _ _ op-wrapper [ ] 2sequence ] assoc-map ;
108 :: high-level-ops ( ctor elt-class -- assoc )
109 ! Some SIMD operations are defined in terms of others.
111 { vbroadcast [ swap nth ctor execute ] }
112 { n+v [ [ ctor execute ] dip v+ ] }
113 { v+n [ ctor execute v+ ] }
114 { n-v [ [ ctor execute ] dip v- ] }
115 { v-n [ ctor execute v- ] }
116 { n*v [ [ ctor execute ] dip v* ] }
117 { v*n [ ctor execute v* ] }
118 { n/v [ [ ctor execute ] dip v/ ] }
119 { v/n [ ctor execute v/ ] }
120 { norm-sq [ dup v. assert-positive ] }
121 { norm [ norm-sq sqrt ] }
122 { normalize [ dup norm v/n ] }
124 ! To compute dot product and distance with integer vectors, we
125 ! have to do things less efficiently, with integer overflow checks,
126 ! in the general case.
127 elt-class float = [ { distance [ v- norm ] } suffix ] when ;
129 TUPLE: simd class elt-class ops special-wrappers schema-wrappers ctor rep ;
131 : define-simd ( simd -- )
132 dup rep>> rep-component-type c:c-type-boxed-class >>elt-class
136 [ [ ops>> ] [ special-wrappers>> ] [ schema-wrappers>> ] tri low-level-ops ]
137 [ rep>> supported-simd-ops ]
138 [ [ ctor>> ] [ elt-class>> ] bi high-level-ops assoc-union ]
140 specialize-vector-words ;
142 :: define-simd-128-type ( class rep -- )
146 [ rep alien-vector class boa ] >>getter
147 [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
153 : (define-simd-128) ( simd -- )
156 [ [ class>> ] [ rep>> ] bi define-simd-128-type ] bi ;
158 FUNCTOR: define-simd-128 ( T -- )
160 N [ 16 T c:heap-size /i ]
162 A DEFINES-CLASS ${T}-${N}
163 A-boa DEFINES ${A}-boa
164 A-with DEFINES ${A}-with
165 A-cast DEFINES ${A}-cast
169 SET-NTH [ T dup c:c-setter c:array-accessor ]
171 A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
172 A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
173 A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
174 A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op
175 A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
176 A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
177 A-v-conversion-op DEFINES-PRIVATE ${A}-v-conversion-op
178 A-vv-conversion-op DEFINES-PRIVATE ${A}-vv-conversion-op
180 A-element-class [ A-rep rep-component-type c:c-type-boxed-class ]
185 { underlying byte-array read-only initial: $[ 16 <byte-array> ] } ;
189 M: A clone underlying>> clone \ A boa ; inline
191 M: A length drop N ; inline
194 over \ A instance? [ v= vall? ] [ 2drop f ] if ;
196 M: A nth-unsafe underlying>> A-rep simd-nth ; inline
199 [ A-element-class boolean>element ] 2dip
200 underlying>> SET-NTH call ; inline
202 : >A ( seq -- simd-array ) \ A new clone-like ;
204 M: A like drop dup \ A instance? [ >A ] unless ; inline
206 M: A new-underlying drop \ A boa ; inline
210 [ drop 16 <byte-array> \ A boa ]
214 M: A c:byte-length underlying>> length ; inline
216 M: A element-type drop A-rep rep-component-type ;
218 M: A pprint-delims drop \ A{ \ } ;
220 M: A >pprint-sequence ;
222 M: A pprint* pprint-object ;
224 SYNTAX: A{ \ } [ >A ] parse-literal ;
226 : A-with ( x -- simd-array ) [ A-rep A ] dip simd-with ;
228 \ A-with \ A-rep \ A define-with-custom-inlining
230 \ A-boa [ \ A-rep \ A simd-boa ] \ A-rep 1 boa-effect define-declared
232 \ A-rep rep-gather-word [
233 \ A-boa \ A-rep \ A define-boa-custom-inlining
236 : A-cast ( simd-array -- simd-array' )
237 underlying>> \ A boa ; inline
243 : A-vv->v-op ( v1 v2 quot -- v3 )
244 [ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
246 : A-vn->v-op ( v1 v2 quot -- v3 )
247 [ [ underlying>> ] dip A-rep ] dip call \ A boa ; inline
249 : A-vv->n-op ( v1 v2 quot -- n )
250 [ [ underlying>> ] bi@ A-rep ] dip call ; inline
252 : A-v->v-op ( v1 quot -- v2 )
253 [ underlying>> A-rep ] dip call \ A boa ; inline
255 : A-v->n-op ( v quot -- n )
256 [ underlying>> A-rep ] dip call ; inline
258 : A-v-conversion-op ( v1 to-type quot -- v2 )
259 swap [ underlying>> A-rep ] [ call ] [ '[ _ boa ] call( u -- v ) ] tri* ; inline
261 : A-vv-conversion-op ( v1 v2 to-type quot -- v2 )
264 [ underlying>> A-rep ]
266 [ '[ _ boa ] call( u -- v ) ]
274 { (v>float) A-v-conversion-op }
275 { (v>integer) A-v-conversion-op }
276 { (vpack-signed) A-vv-conversion-op }
277 { (vpack-unsigned) A-vv-conversion-op }
278 { (vunpack-head) A-v-conversion-op }
279 { (vunpack-tail) A-v-conversion-op }
282 { { +vector+ +vector+ -> +vector+ } A-vv->v-op }
283 { { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
284 { { +vector+ +literal+ -> +vector+ } A-vn->v-op }
285 { { +vector+ +vector+ -> +scalar+ } A-vv->n-op }
286 { { +vector+ -> +vector+ } A-v->v-op }
287 { { +vector+ -> +scalar+ } A-v->n-op }
288 { { +vector+ -> +nonnegative+ } A-v->n-op }
296 ! Synthesize 256-bit vectors from a pair of 128-bit vectors
300 :: define-simd-256-type ( class rep -- )
306 [ 16 + >fixnum rep alien-vector ] 2bi
310 [ [ underlying1>> ] 2dip rep set-alien-vector ]
311 [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
319 : (define-simd-256) ( simd -- )
320 simd-ops get { vshuffle-elements vshuffle-bytes hlshift hrshift } unique assoc-diff >>ops
322 [ [ class>> ] [ rep>> ] bi define-simd-256-type ] bi ;
324 FUNCTOR: define-simd-256 ( T -- )
326 N [ 32 T c:heap-size /i ]
330 A/2-boa IS ${A/2}-boa
331 A/2-with IS ${A/2}-with
333 A DEFINES-CLASS ${T}-${N}
334 A-boa DEFINES ${A}-boa
335 A-with DEFINES ${A}-with
336 A-cast DEFINES ${A}-cast
340 A-deref DEFINES-PRIVATE ${A}-deref
342 A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
343 A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
344 A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
345 A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
346 A-v.-op DEFINES-PRIVATE ${A}-v.-op
347 (A-v->n-op) DEFINES-PRIVATE (${A}-v->v-op)
348 A-sum-op DEFINES-PRIVATE ${A}-sum-op
349 A-vany-op DEFINES-PRIVATE ${A}-vany-op
350 A-vall-op DEFINES-PRIVATE ${A}-vall-op
351 A-vmerge-head-op DEFINES-PRIVATE ${A}-vmerge-head-op
352 A-vmerge-tail-op DEFINES-PRIVATE ${A}-vmerge-tail-op
353 A-v-conversion-op DEFINES-PRIVATE ${A}-v-conversion-op
354 A-vpack-op DEFINES-PRIVATE ${A}-vpack-op
355 A-vunpack-head-op DEFINES-PRIVATE ${A}-vunpack-head-op
356 A-vunpack-tail-op DEFINES-PRIVATE ${A}-vunpack-tail-op
364 { underlying1 byte-array initial: $[ 16 <byte-array> ] read-only }
365 { underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ;
370 [ underlying1>> clone ] [ underlying2>> clone ] bi
373 M: A length drop N ; inline
376 over \ A instance? [ v= vall? ] [ 2drop f ] if ;
378 : A-deref ( n seq -- n' seq' )
379 over N/2 < [ underlying1>> ] [ [ N/2 - ] dip underlying2>> ] if \ A/2 boa ; inline
381 M: A nth-unsafe A-deref nth-unsafe ; inline
383 M: A set-nth-unsafe A-deref set-nth-unsafe ; inline
385 : >A ( seq -- simd-array ) \ A new clone-like ;
387 M: A like drop dup \ A instance? [ >A ] unless ; inline
391 [ drop 16 <byte-array> 16 <byte-array> \ A boa ]
395 M: A c:byte-length drop 32 ; inline
397 M: A element-type drop A-rep rep-component-type ;
399 SYNTAX: A{ \ } [ >A ] parse-literal ;
401 M: A pprint-delims drop \ A{ \ } ;
403 M: A >pprint-sequence ;
405 M: A pprint* pprint-object ;
407 : A-with ( x -- simd-array )
408 [ A/2-with ] [ A/2-with ] bi [ underlying>> ] bi@
411 : A-boa ( ... -- simd-array )
412 [ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@
415 \ A-rep 2 boa-effect \ A-boa set-stack-effect
417 : A-cast ( simd-array -- simd-array' )
418 [ underlying1>> ] [ underlying2>> ] bi \ A boa ; inline
422 : A-vv->v-op ( v1 v2 quot -- v3 )
423 [ [ [ underlying1>> ] bi@ A-rep ] dip call ]
424 [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
427 : A-vn->v-op ( v1 v2 quot -- v3 )
428 [ [ [ underlying1>> ] dip A-rep ] dip call ]
429 [ [ [ underlying2>> ] dip A-rep ] dip call ] 3bi
432 : A-v->v-op ( v1 combine-quot -- v2 )
433 [ [ underlying1>> A-rep ] dip call ]
434 [ [ underlying2>> A-rep ] dip call ] 2bi
437 : A-v.-op ( v1 v2 quot -- n )
438 [ [ [ underlying1>> ] bi@ A-rep ] dip call ]
439 [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
442 : (A-v->n-op) ( v1 quot reduce-quot -- n )
443 '[ [ underlying1>> ] [ underlying2>> ] bi A-rep @ A-rep ] dip call ; inline
445 : A-sum-op ( v1 quot -- n )
446 [ (simd-v+) ] (A-v->n-op) ; inline
448 : A-vany-op ( v1 quot -- n )
449 [ (simd-vbitor) ] (A-v->n-op) ; inline
450 : A-vall-op ( v1 quot -- n )
451 [ (simd-vbitand) ] (A-v->n-op) ; inline
453 : A-vmerge-head-op ( v1 v2 quot -- v )
455 [ underlying1>> ] bi@
456 [ A-rep (simd-(vmerge-head)) ]
457 [ A-rep (simd-(vmerge-tail)) ] 2bi
460 : A-vmerge-tail-op ( v1 v2 quot -- v )
462 [ underlying2>> ] bi@
463 [ A-rep (simd-(vmerge-head)) ]
464 [ A-rep (simd-(vmerge-tail)) ] 2bi
467 : A-v-conversion-op ( v1 to-type quot -- v )
469 [ [ underlying1>> A-rep ] dip call ]
470 [ [ underlying2>> A-rep ] dip call ] 2bi
471 ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
473 : A-vpack-op ( v1 v2 to-type quot -- v )
475 '[ [ underlying1>> ] [ underlying2>> ] bi A-rep @ ] bi*
476 ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
478 : A-vunpack-head-op ( v1 to-type quot -- v )
482 [ A-rep (simd-(vunpack-tail)) ] bi
483 ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
485 : A-vunpack-tail-op ( v1 to-type quot -- v )
488 [ A-rep (simd-(vunpack-head)) ]
490 ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
502 { (vmerge-head) A-vmerge-head-op }
503 { (vmerge-tail) A-vmerge-tail-op }
504 { (v>integer) A-v-conversion-op }
505 { (v>float) A-v-conversion-op }
506 { (vpack-signed) A-vpack-op }
507 { (vpack-unsigned) A-vpack-op }
508 { (vunpack-head) A-vunpack-head-op }
509 { (vunpack-tail) A-vunpack-tail-op }
512 { { +vector+ +vector+ -> +vector+ } A-vv->v-op }
513 { { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
514 { { +vector+ +literal+ -> +vector+ } A-vn->v-op }
515 { { +vector+ -> +vector+ } A-v->v-op }