4 "double" c-type (>>align)
] unless
-USING: cpu.x86.features cpu.x86.features.private ;
-
-"-no-sse2" (command-line) member? [
- [ { check_sse2 } compile ] with-optimizer
-
- "Checking if your CPU supports SSE2..." print flush
- sse2? [
- " - yes" print
- enable-sse2
- [
- sse2? [
- "This image was built to use SSE2, which your CPU does not support." print
- "You will need to bootstrap Factor again." print
- flush
- 1 exit
- ] unless
- ] "cpu.x86" add-init-hook
- ] [ " - no" print ] if
-] unless
+USE: vocabs.loader
+
+"cpu.x86.features" require
! Enable fast calling of libc math functions
enable-float-functions
-! SSE2 is always available on x86-64.
-enable-sse2
-
USE: vocabs.loader
{
{ [ os unix? ] [ "cpu.x86.64.unix" require ] }
{ [ os winnt? ] [ "cpu.x86.64.winnt" require ] }
} cond
+
+"cpu.x86.features" require
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: system kernel math alien.syntax ;
+USING: system kernel math alien.syntax combinators locals init io
+cpu.x86 compiler compiler.units accessors ;
IN: cpu.x86.features
<PRIVATE
-FUNCTION: bool check_sse2 ( ) ;
+FUNCTION: int sse_version ( ) ;
FUNCTION: longlong read_timestamp_counter ( ) ;
PRIVATE>
-HOOK: sse2? cpu ( -- ? )
+ALIAS: sse-version sse_version
-M: x86.32 sse2? check_sse2 ;
-
-M: x86.64 sse2? t ;
+: sse-string ( version -- string )
+ {
+ { 00 [ "no SSE" ] }
+ { 10 [ "SSE1" ] }
+ { 20 [ "SSE2" ] }
+ { 30 [ "SSE3" ] }
+ { 33 [ "SSSE3" ] }
+ { 41 [ "SSE4.1" ] }
+ { 42 [ "SSE4.2" ] }
+ } case ;
HOOK: instruction-count cpu ( -- n )
: count-instructions ( quot -- n )
instruction-count [ call ] dip instruction-count swap - ; inline
+
+USING: cpu.x86.features cpu.x86.features.private ;
+
+:: install-sse-check ( version -- )
+ [
+ sse-version version < [
+ "This image was built to use " write
+ version sse-string write
+ " but your CPU supports " write
+ sse-version sse-string write "." print
+ "You will need to bootstrap Factor again." print
+ flush
+ 1 exit
+ ] when
+ ] "cpu.x86" add-init-hook ;
+
+: enable-sse ( version -- )
+ {
+ { 00 [ ] }
+ { 10 [ ] }
+ { 20 [ enable-sse2 ] }
+ { 30 [ enable-sse3 ] }
+ { 33 [ enable-sse3 ] }
+ { 41 [ enable-sse3 ] }
+ { 42 [ enable-sse3 ] }
+ } case ;
+
+[ { sse_version } compile ] with-optimizer
+
+"Checking for multimedia extensions: " write sse-version
+[ sse-string write " detected" print ]
+[ install-sse-check ]
+[ enable-sse ] tri
: enable-sse2 ( -- )
enable-float-intrinsics
enable-fsqrt
- enable-float-min/max ;
+ enable-float-min/max
+ enable-sse2-simd ;
+
+: enable-sse3 ( -- )
+ enable-sse2
+ enable-sse3-simd ;
enable-min/max
generalizations kernel math math.functions math.vectors
math.vectors.simd.functor math.vectors.specialization parser
prettyprint.custom sequences sequences.private
-specialized-arrays.double locals assocs literals ;
+specialized-arrays.double locals assocs literals words fry ;
IN: math.vectors.simd
<PRIVATE
<PRIVATE
-:: simd-vector-words ( class ctor elt-type assoc -- )
- class elt-type assoc {
+: supported-simd-ops ( assoc -- assoc' )
+ {
+ { v+ (simd-v+) }
+ { v- (simd-v-) }
+ { v* (simd-v*) }
+ { v/ (simd-v/) }
+ { vmin (simd-vmin) }
+ { vmax (simd-vmax) }
+ { sum (simd-sum) }
+ } [ nip "intrinsic" word-prop ] assoc-filter
+ '[ drop _ key? ] assoc-filter ;
+
+:: high-level-ops ( ctor -- assoc )
+ {
{ vneg [ [ dup v- ] keep v- ] }
{ v. [ v* sum ] }
{ n+v [ [ ctor execute ] dip v+ ] }
{ norm [ norm-sq sqrt ] }
{ normalize [ dup norm v/n ] }
{ distance [ v- norm ] }
- } assoc-union
+ } ;
+
+:: simd-vector-words ( class ctor elt-type assoc -- )
+ class elt-type assoc supported-simd-ops ctor high-level-ops assoc-union
specialize-vector-words ;
PRIVATE>
add $12,%esp /* pop args from the stack */
ret /* return _with new stack_ */
-/* cpu.x86.32 calls this */
-DEF(bool,check_sse2,(void)):
- push %ebx
- mov $1,%eax
- cpuid
- shr $26,%edx
- and $1,%edx
- pop %ebx
- mov %edx,%eax
- ret
-
DEF(long long,read_timestamp_counter,(void)):
rdtsc
ret
#ifdef WINDOWS
.section .drectve
- .ascii " -export:check_sse2"
.ascii " -export:read_timestamp_counter"
#endif
add $STACK_PADDING,STACK_REG
jmp *QUOT_XT_OFFSET(ARG0) /* Call the quotation */
+/* cpu.x86.features calls this */
+DEF(bool,sse_version,(void)):
+ mov $0x1,RETURN_REG
+ cpuid
+ test $0x100000,%ecx
+ jnz sse_42
+ test $0x80000,%ecx
+ jnz sse_41
+ test $0x200,%ecx
+ jnz ssse_3
+ test $0x1,%ecx
+ jnz sse_3
+ test $0x4000000,%edx
+ jnz sse_2
+ test $0x2000000,%edx
+ jnz sse_1
+ mov $0,%eax
+ ret
+sse_42:
+ mov $42,RETURN_REG
+ ret
+sse_41:
+ mov $41,RETURN_REG
+ ret
+ssse_3:
+ mov $33,RETURN_REG
+ ret
+sse_3:
+ mov $30,RETURN_REG
+ ret
+sse_2:
+ mov $20,RETURN_REG
+ ret
+sse_1:
+ mov $10,RETURN_REG
+ ret
#ifdef WINDOWS
.section .drectve
+ .ascii " -export:sse_version"
.ascii " -export:c_to_factor"
#endif