} available-reps ;
! M:: x86 %broadcast-vector ( dst src rep -- )
-! rep unsign-rep {
+! rep signed-rep {
! { float-4-rep [
! dst src float-4-rep %copy
! dst dst { 0 0 0 0 } SHUFPS
! } available-reps ;
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
- rep unsign-rep {
+ rep signed-rep {
{ float-4-rep [
dst src1 float-4-rep %copy
dst src2 UNPCKLPS
} available-reps ;
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
- rep unsign-rep {
+ rep signed-rep {
{ double-2-rep [
dst src1 double-2-rep %copy
dst src2 MOVLHPS
M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- )
dst src rep %copy
- dst shuffle rep unsign-rep {
+ dst shuffle rep signed-rep {
{ double-2-rep [ >float-4-shuffle float-4-shuffle ] }
{ float-4-rep [ float-4-shuffle ] }
{ int-4-rep [ int-4-shuffle ] }
M: x86 %merge-vector-head
[ two-operand ] keep
- unsign-rep {
+ signed-rep {
{ double-2-rep [ MOVLHPS ] }
{ float-4-rep [ UNPCKLPS ] }
{ longlong-2-rep [ PUNPCKLQDQ ] }
M: x86 %merge-vector-tail
[ two-operand ] keep
- unsign-rep {
+ signed-rep {
{ double-2-rep [ UNPCKHPD ] }
{ float-4-rep [ UNPCKHPS ] }
{ longlong-2-rep [ PUNPCKHQDQ ] }
M: x86 %unsigned-pack-vector
[ two-operand ] keep
- unsign-rep {
+ signed-rep {
{ int-4-rep [ PACKUSDW ] }
{ short-8-rep [ PACKUSWB ] }
} case ;
} case ;
:: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- )
- rep unsign-rep :> rep'
+ rep signed-rep :> rep'
dst src rep' {
{ longlong-2-rep [ int64 call ] }
{ int-4-rep [ int32 call ] }
M: x86 %dot-vector
[ two-operand ] keep
{
- { float-4-rep [
- sse4.1?
- [ HEX: ff DPPS ]
- [ [ MULPS ] [ drop dup float-4-rep %horizontal-add-vector ] 2bi ]
- if
- ] }
- { double-2-rep [
- sse4.1?
- [ HEX: ff DPPD ]
- [ [ MULPD ] [ drop dup double-2-rep %horizontal-add-vector ] 2bi ]
- if
- ] }
+ { float-4-rep [ HEX: ff DPPS ] }
+ { double-2-rep [ HEX: ff DPPD ] }
} case ;
M: x86 %dot-vector-reps
{
- { sse3? { float-4-rep double-2-rep } }
+ { sse4.1? { float-4-rep double-2-rep } }
} available-reps ;
-M: x86 %horizontal-add-vector ( dst src rep -- )
- {
- { float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
- { double-2-rep [ [ double-2-rep %copy ] [ HADDPD ] 2bi ] }
+M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ signed-rep {
+ { float-4-rep [ HADDPS ] }
+ { double-2-rep [ HADDPD ] }
+ { int-4-rep [ PHADDD ] }
+ { short-8-rep [ PHADDW ] }
} case ;
M: x86 %horizontal-add-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
+ { ssse3? { int-4-rep uint-4-rep short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )
M: x86 %horizontal-shl-vector-imm-reps
{
- { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
} available-reps ;
M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- )
M: x86 %horizontal-shr-vector-imm-reps
{
- { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
} available-reps ;
M: x86 %abs-vector ( dst src rep -- )
M: x86 %integer>scalar drop MOVD ;
+! XXX the longlong versions won't work on x86.32
M:: x86 %scalar>integer ( dst src rep -- )
rep {
+ { longlong-scalar-rep [
+ dst src MOVD
+ ] }
+ { ulonglong-scalar-rep [
+ dst src MOVD
+ ] }
{ int-scalar-rep [
dst 32-bit-version-of src MOVD
dst dst 32-bit-version-of
#! set up by the caller.
stack-frame get total-size>> + stack@ ;
-enable-simd
enable-min/max
enable-fixnum-log2