M: x86 %dot-vector
[ two-operand ] keep
{
- { float-4-rep [
- sse4.1?
- [ HEX: ff DPPS ]
- [ [ MULPS ] [ drop dup float-4-rep %horizontal-add-vector ] 2bi ]
- if
- ] }
- { double-2-rep [
- sse4.1?
- [ HEX: ff DPPD ]
- [ [ MULPD ] [ drop dup double-2-rep %horizontal-add-vector ] 2bi ]
- if
- ] }
+ { float-4-rep [ HEX: ff DPPS ] }
+ { double-2-rep [ HEX: ff DPPD ] }
} case ;
M: x86 %dot-vector-reps
{
- { sse3? { float-4-rep double-2-rep } }
+ { sse4.1? { float-4-rep double-2-rep } }
} available-reps ;
-M: x86 %horizontal-add-vector ( dst src rep -- )
- {
- { float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
- { double-2-rep [ [ double-2-rep %copy ] [ HADDPD ] 2bi ] }
+M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ unsign-rep {
+ { float-4-rep [ HADDPS ] }
+ { double-2-rep [ HADDPD ] }
+ { int-4-rep [ PHADDD ] }
+ { short-8-rep [ PHADDW ] }
} case ;
M: x86 %horizontal-add-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
+ { ssse3? { int-4-rep uint-4-rep short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )
M: x86 %horizontal-shl-vector-imm-reps
{
- { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
} available-reps ;
M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- )
M: x86 %horizontal-shr-vector-imm-reps
{
- { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
} available-reps ;
M: x86 %abs-vector ( dst src rep -- )
#! set up by the caller.
stack-frame get total-size>> + stack@ ;
-enable-simd
enable-min/max
enable-fixnum-log2