compiler: add fixnum-min/max intrinsics; ~10% speedup on benchmark.yuv-to-rgb

author Slava Pestov <slava@slava-pestovs-macbook-pro.local>

Sat, 29 Aug 2009 00:02:59 +0000 (19:02 -0500)

committer Slava Pestov <slava@slava-pestovs-macbook-pro.local>

Sat, 29 Aug 2009 00:02:59 +0000 (19:02 -0500)
author Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Sat, 29 Aug 2009 00:02:59 +0000 (19:02 -0500)
committer Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Sat, 29 Aug 2009 00:02:59 +0000 (19:02 -0500)
diff --git a/basis/compiler/cfg/hats/hats.factor b/basis/compiler/cfg/hats/hats.factor

index de612f2c28f5df6ac2dd902a298f8caacb4325fc..d0b2cd4d9e7ef8c217fa618a7530c2b4ad2d1a6a 100644 (file)
--- a/basis/compiler/cfg/hats/hats.factor
+++ b/basis/compiler/cfg/hats/hats.factor
@@ -35,6 +35,8 @@ IN: compiler.cfg.hats
  : ^^shr-imm ( src1 src2 -- dst ) ^^r2 ##shr-imm ; inline
  : ^^sar ( src1 src2 -- dst ) ^^r2 ##sar ; inline
  : ^^sar-imm ( src1 src2 -- dst ) ^^r2 ##sar-imm ; inline
+: ^^min ( src1 src2 -- dst ) ^^r2 ##min ; inline
+: ^^max ( src1 src2 -- dst ) ^^r2 ##max ; inline
  : ^^not ( src -- dst ) ^^r1 ##not ; inline
  : ^^log2 ( src -- dst ) ^^r1 ##log2 ; inline
  : ^^bignum>integer ( src -- dst ) ^^r1 next-vreg ##bignum>integer ; inline
diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor

index 41e227ed7655b12f64f8ba007f10b7c3765da3c0..9706507193f6a115bcd147dde8eff08ade204f6b 100644 (file)
--- a/basis/compiler/cfg/instructions/instructions.factor
+++ b/basis/compiler/cfg/instructions/instructions.factor
@@ -91,6 +91,8 @@ INSN: ##shr < ##binary ;
  INSN: ##shr-imm < ##binary-imm ;
  INSN: ##sar < ##binary ;
  INSN: ##sar-imm < ##binary-imm ;
+INSN: ##min < ##binary ;
+INSN: ##max < ##binary ;
  INSN: ##not < ##unary ;
  INSN: ##log2 < ##unary ;
  
diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor

index 17e8a1336d345758b29c65e0d07b285c2ee213aa..562c3ad836fad8a6fc461e22f25b77ea52b417b2 100644 (file)
--- a/basis/compiler/cfg/intrinsics/intrinsics.factor
+++ b/basis/compiler/cfg/intrinsics/intrinsics.factor
@@ -25,6 +25,9 @@ QUALIFIED: math.floats.private
  QUALIFIED: math.libm
  IN: compiler.cfg.intrinsics
  
+: enable-intrinsics ( words -- )
+    [ t "intrinsic" set-word-prop ] each ;
+
  {
      kernel.private:tag
      kernel.private:getenv
@@ -67,7 +70,7 @@ IN: compiler.cfg.intrinsics
      alien.accessors:set-alien-signed-2
      alien.accessors:alien-cell
      alien.accessors:set-alien-cell
-} [ t "intrinsic" set-word-prop ] each
+} enable-intrinsics
  
  : enable-alien-4-intrinsics ( -- )
      {
@@ -75,7 +78,7 @@ IN: compiler.cfg.intrinsics
          alien.accessors:set-alien-unsigned-4
          alien.accessors:alien-signed-4
          alien.accessors:set-alien-signed-4
-    } [ t "intrinsic" set-word-prop ] each ;
+    } enable-intrinsics ;
  
  : enable-float-intrinsics ( -- )
      {
@@ -94,7 +97,7 @@ IN: compiler.cfg.intrinsics
          alien.accessors:set-alien-float
          alien.accessors:alien-double
          alien.accessors:set-alien-double
-    } [ t "intrinsic" set-word-prop ] each ;
+    } enable-intrinsics ;
  
  : enable-fsqrt ( -- )
      \ math.libm:fsqrt t "intrinsic" set-word-prop ;
@@ -103,10 +106,16 @@ IN: compiler.cfg.intrinsics
      {
          math.floats.private:float-min
          math.floats.private:float-max
-    } [ t "intrinsic" set-word-prop ] each ;
+    } enable-intrinsics ;
+
+: enable-min/max ( -- )
+    {
+        math.integers.private:fixnum-min
+        math.integers.private:fixnum-max
+    } enable-intrinsics ;
  
  : enable-fixnum-log2 ( -- )
-    \ math.integers.private:fixnum-log2 t "intrinsic" set-word-prop ;
+    { math.integers.private:fixnum-log2 } enable-intrinsics ;
  
  : emit-intrinsic ( node word -- )
      {
@@ -130,6 +139,8 @@ IN: compiler.cfg.intrinsics
          { \ math.private:fixnum>= [ drop cc>= emit-fixnum-comparison ] }
          { \ math.private:fixnum> [ drop cc> emit-fixnum-comparison ] }
          { \ kernel:eq? [ drop cc= emit-fixnum-comparison ] }
+        { \ math.integers.private:fixnum-min [ drop [ ^^min ] emit-fixnum-op ] }
+        { \ math.integers.private:fixnum-max [ drop [ ^^max ] emit-fixnum-op ] }
          { \ math.private:bignum>fixnum [ drop emit-bignum>fixnum ] }
          { \ math.private:fixnum>bignum [ drop emit-fixnum>bignum ] }
          { \ math.private:float+ [ drop [ ^^add-float ] emit-float-op ] }
diff --git a/basis/compiler/cfg/two-operand/two-operand.factor b/basis/compiler/cfg/two-operand/two-operand.factor

index e8fc036020a8fa87d342807a1047f202334a9a33..15151ff9e6be7843ec6d64925e421a5953202dde 100644 (file)
--- a/basis/compiler/cfg/two-operand/two-operand.factor
+++ b/basis/compiler/cfg/two-operand/two-operand.factor
@@ -35,6 +35,8 @@ UNION: two-operand-insn
      ##shr-imm
      ##sar
      ##sar-imm
+    ##min
+    ##max
      ##fixnum-overflow
      ##add-float
      ##sub-float
diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor

index 7c95c9d0a87ebde00fd09f4cff210cb0d8c97c1d..c0f793a7dc67fb9c5072ade99c4a1df4ea8148c3 100755 (executable)
--- a/basis/compiler/codegen/codegen.factor
+++ b/basis/compiler/codegen/codegen.factor
@@ -149,6 +149,8 @@ M: ##shr     generate-insn dst/src1/src2 %shr     ;
  M: ##shr-imm generate-insn dst/src1/src2 %shr-imm ;
  M: ##sar     generate-insn dst/src1/src2 %sar     ;
  M: ##sar-imm generate-insn dst/src1/src2 %sar-imm ;
+M: ##min     generate-insn dst/src1/src2 %min     ;
+M: ##max     generate-insn dst/src1/src2 %max     ;
  M: ##not     generate-insn dst/src       %not     ;
  M: ##log2    generate-insn dst/src       %log2    ;
  
diff --git a/basis/compiler/tests/intrinsics.factor b/basis/compiler/tests/intrinsics.factor

index 6180e49befd0b5d67995b83400690156f0797ce9..23d26b0033094ba1f9ac9abc771288620e34bdcf 100644 (file)
--- a/basis/compiler/tests/intrinsics.factor
+++ b/basis/compiler/tests/intrinsics.factor
@@ -1,11 +1,10 @@
-USING: accessors arrays compiler.units kernel kernel.private math
-math.constants math.private sequences strings tools.test words
-continuations sequences.private hashtables.private byte-arrays
-system random layouts vectors
+USING: accessors arrays compiler.units kernel kernel.private
+math math.constants math.private math.integers.private sequences
+strings tools.test words continuations sequences.private
+hashtables.private byte-arrays system random layouts vectors
  sbufs strings.private slots.private alien math.order
  alien.accessors alien.c-types alien.syntax alien.strings
-namespaces libc io.encodings.ascii
-classes compiler ;
+namespaces libc io.encodings.ascii classes compiler ;
  IN: compiler.tests.intrinsics
  
  ! Make sure that intrinsic ops compile to correct code.
@@ -271,6 +270,15 @@ cell 8 = [
      [ 100000 swap array-nth ] compile-call
  ] unit-test
  
+[ 2 ] [ 2 4 [ fixnum-min ] compile-call ] unit-test
+[ 2 ] [ 4 2 [ fixnum-min ] compile-call ] unit-test
+[ 4 ] [ 2 4 [ fixnum-max ] compile-call ] unit-test
+[ 4 ] [ 4 2 [ fixnum-max ] compile-call ] unit-test
+[ -2 ] [ -2 -4 [ fixnum-max ] compile-call ] unit-test
+[ -2 ] [ -4 -2 [ fixnum-max ] compile-call ] unit-test
+[ -4 ] [ -2 -4 [ fixnum-min ] compile-call ] unit-test
+[ -4 ] [ -4 -2 [ fixnum-min ] compile-call ] unit-test
+
  ! 64-bit overflow
  cell 8 = [
      [ t ] [ 1 59 fixnum-shift dup [ fixnum+ ] compile-call 1 60 fixnum-shift = ] unit-test
diff --git a/basis/compiler/tree/propagation/known-words/known-words.factor b/basis/compiler/tree/propagation/known-words/known-words.factor

index efcf05d7bcd620bb9c028faf6decb246af4a9acb..69785c8c0ab886499ab02e47df50582684a0408e 100644 (file)
--- a/basis/compiler/tree/propagation/known-words/known-words.factor
+++ b/basis/compiler/tree/propagation/known-words/known-words.factor
@@ -79,11 +79,16 @@ IN: compiler.tree.propagation.known-words
      ] unless ;
  
  : ensure-math-class ( class must-be -- class' )
-    [ class<= ] 2keep ? ;
+    [ class<= ] most ;
  
  : number-valued ( class interval -- class' interval' )
      [ number ensure-math-class ] dip ;
  
+: fixnum-valued ( class interval -- class' interval' )
+    over null-class? [
+        [ drop fixnum ] dip
+    ] unless ;
+
  : integer-valued ( class interval -- class' interval' )
      [ integer ensure-math-class ] dip ;
  
@@ -304,7 +309,15 @@ flog fpow fsqrt facosh fasinh fatanh } [
      { float } "default-output-classes" set-word-prop
  ] each
  
-{ float-min float-max } [
-    [ { float float } "input-classes" set-word-prop ]
-    [ { float } "default-output-classes" set-word-prop ] bi
-] each
+! Find a less repetitive way of doing this
+\ float-min { float float } "input-classes" set-word-prop
+\ float-min [ interval-min ] [ float-valued ] binary-op
+
+\ float-max { float float } "input-classes" set-word-prop
+\ float-max [ interval-max ] [ float-valued ] binary-op
+
+\ fixnum-min { fixnum fixnum } "input-classes" set-word-prop
+\ fixnum-min [ interval-min ] [ fixnum-valued ] binary-op
+
+\ fixnum-max { fixnum fixnum } "input-classes" set-word-prop
+\ fixnum-max [ interval-max ] [ fixnum-valued ] binary-op
diff --git a/basis/compiler/tree/propagation/transforms/transforms.factor b/basis/compiler/tree/propagation/transforms/transforms.factor

index d0362b3222aab08b3cdb11535e41f5684ad50284..9d0e5c89990398c24c275f734ff82896a6e496e2 100644 (file)
--- a/basis/compiler/tree/propagation/transforms/transforms.factor
+++ b/basis/compiler/tree/propagation/transforms/transforms.factor
@@ -1,11 +1,12 @@
  ! Copyright (C) 2008, 2009 Slava Pestov, Daniel Ehrenberg.
  ! See http://factorcode.org/license.txt for BSD license.
-USING: kernel sequences words fry generic accessors classes.tuple
-classes classes.algebra definitions stack-checker.state quotations
-classes.tuple.private math math.partial-dispatch math.private
-math.intervals math.floats.private layouts math.order vectors hashtables
-combinators effects generalizations assocs sets
-combinators.short-circuit sequences.private locals
+USING: kernel sequences words fry generic accessors
+classes.tuple classes classes.algebra definitions
+stack-checker.state quotations classes.tuple.private math
+math.partial-dispatch math.private math.intervals
+math.floats.private math.integers.private layouts math.order
+vectors hashtables combinators effects generalizations assocs
+sets combinators.short-circuit sequences.private locals
  stack-checker namespaces compiler.tree.propagation.info ;
  IN: compiler.tree.propagation.transforms
  
@@ -79,15 +80,25 @@ IN: compiler.tree.propagation.transforms
      ] [ f ] if
  ] "custom-inlining" set-word-prop
  
-{
-    { min [ float-min ] }
-    { max [ float-max ] }
-} [
-    '[
-        in-d>> first2 [ value-info class>> float class<= ] both?
-        [ _ ] [ f ] if
-    ] "custom-inlining" set-word-prop
-] assoc-each
+! Integrate this with generic arithmetic optimization instead?
+: both-inputs? ( #call class -- ? )
+    [ in-d>> first2 ] dip '[ value-info class>> _ class<= ] both? ;
+
+\ min [
+    {
+        { [ dup fixnum both-inputs? ] [ [ fixnum-min ] ] }
+        { [ dup float both-inputs? ] [ [ float-min ] ] }
+        [ f ]
+    } cond nip
+] "custom-inlining" set-word-prop
+
+\ max [
+    {
+        { [ dup fixnum both-inputs? ] [ [ fixnum-max ] ] }
+        { [ dup float both-inputs? ] [ [ float-max ] ] }
+        [ f ]
+    } cond nip
+] "custom-inlining" set-word-prop
  
  ! Generate more efficient code for common idiom
  \ clone [
diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor

index 41cbd3014690031e2545a65093a9fa2f1b1b1752..fc972229e80abd73df583455f625255c023b1117 100644 (file)
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@@ -96,6 +96,8 @@ HOOK: %shr     cpu ( dst src1 src2 -- )
  HOOK: %shr-imm cpu ( dst src1 src2 -- )
  HOOK: %sar     cpu ( dst src1 src2 -- )
  HOOK: %sar-imm cpu ( dst src1 src2 -- )
+HOOK: %min     cpu ( dst src1 src2 -- )
+HOOK: %max     cpu ( dst src1 src2 -- )
  HOOK: %not     cpu ( dst src -- )
  HOOK: %log2    cpu ( dst src -- )
  
diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor

index 12414c3f94fe6c499a6dc6280abf735801536ea5..da7b89de0b4891e4d62be38c274110e40d75ab8b 100644 (file)
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@@ -123,6 +123,10 @@ M: x86 %xor-imm nip XOR ;
  M: x86 %shl-imm nip SHL ;
  M: x86 %shr-imm nip SHR ;
  M: x86 %sar-imm nip SAR ;
+
+M: x86 %min     nip [ CMP ] [ CMOVG ] 2bi ;
+M: x86 %max     nip [ CMP ] [ CMOVL ] 2bi ;
+
  M: x86 %not     drop NOT ;
  M: x86 %log2    BSR ;
  
@@ -579,3 +583,5 @@ M: x86 small-enough? ( n -- ? )
      enable-float-intrinsics
      enable-fsqrt
      enable-float-min/max ;
+
+enable-min/max
diff --git a/basis/math/intervals/intervals-tests.factor b/basis/math/intervals/intervals-tests.factor

index 4e44fc1208c5227c634e207a51451e85604400ca..1ee4e1e100f6c7285edb9a7f2ace547bdd95c0af 100644 (file)
--- a/basis/math/intervals/intervals-tests.factor
+++ b/basis/math/intervals/intervals-tests.factor
@@ -235,6 +235,10 @@ IN: math.intervals.tests
      interval-contains?
  ] unit-test
  
+[ t ] [ full-interval 10 10 [a,b] interval-max 10 1/0. [a,b] = ] unit-test
+
+[ t ] [ full-interval 10 10 [a,b] interval-min -1/0. 10 [a,b] = ] unit-test
+
  [ t ] [ 1 100 [a,b] -1 1 [a,b] interval/i [-inf,inf] = ] unit-test
  
  ! Accuracy of interval-mod
diff --git a/basis/math/intervals/intervals.factor b/basis/math/intervals/intervals.factor

index 99997ab8cb0bc9798e87d6df68a9fb6165a64162..05f9906bb9d6602d2aa6e1862ff9d2315ae54e8c 100755 (executable)
--- a/basis/math/intervals/intervals.factor
+++ b/basis/math/intervals/intervals.factor
@@ -7,7 +7,7 @@ IN: math.intervals
  
  SYMBOL: empty-interval
  
-SYMBOL: full-interval
+SINGLETON: full-interval
  
  TUPLE: interval { from read-only } { to read-only } ;
  
@@ -238,12 +238,24 @@ MEMO: array-capacity-interval ( -- interval )
      ] do-empty-interval ;
  
  : interval-max ( i1 i2 -- i3 )
-    #! Inaccurate; could be tighter
-    [ [ interval-closure ] bi@ [ max ] interval-op nan-not-ok ] do-empty-interval ;
+    {
+        { [ over empty-interval eq? ] [ drop ] }
+        { [ dup empty-interval eq? ] [ nip ] }
+        { [ 2dup [ full-interval eq? ] both? ] [ drop ] }
+        { [ over full-interval eq? ] [ nip from>> first [a,inf] ] }
+        { [ dup full-interval eq? ] [ drop from>> first [a,inf] ] }
+        [ [ interval-closure ] bi@ [ max ] interval-op nan-not-ok ]
+    } cond ;
  
  : interval-min ( i1 i2 -- i3 )
-    #! Inaccurate; could be tighter
-    [ [ interval-closure ] bi@ [ min ] interval-op nan-not-ok ] do-empty-interval ;
+    {
+        { [ over empty-interval eq? ] [ drop ] }
+        { [ dup empty-interval eq? ] [ nip ] }
+        { [ 2dup [ full-interval eq? ] both? ] [ drop ] }
+        { [ over full-interval eq? ] [ nip to>> first [-inf,a] ] }
+        { [ dup full-interval eq? ] [ drop to>> first [-inf,a] ] }
+        [ [ interval-closure ] bi@ [ min ] interval-op nan-not-ok ]
+    } cond ;
  
  : interval-interior ( i1 -- i2 )
      dup special-interval? [
diff --git a/core/math/floats/floats.factor b/core/math/floats/floats.factor

index 02dbd6ea84ccd82c3d122c55447c5d8148b57c08..53c3fe543e0d067b546e8bad0b852dba53671323 100644 (file)
--- a/core/math/floats/floats.factor
+++ b/core/math/floats/floats.factor
@@ -3,8 +3,8 @@
  USING: kernel math math.private ;
  IN: math.floats.private
  
-: float-min ( x y -- z ) [ float< ] 2keep ? ;
-: float-max ( x y -- z ) [ float> ] 2keep ? ;
+: float-min ( x y -- z ) [ float< ] most ; foldable
+: float-max ( x y -- z ) [ float> ] most ; foldable
  
  M: fixnum >float fixnum>float ; inline
  M: bignum >float bignum>float ; inline
diff --git a/core/math/integers/integers.factor b/core/math/integers/integers.factor

index 75abd8087e3cccf0edc9fd22af5fb2468077b1cb..ed25e3bfa6b5030f21000fd2bbb66474fb6e6520 100644 (file)
--- a/core/math/integers/integers.factor
+++ b/core/math/integers/integers.factor
@@ -1,10 +1,13 @@
-! Copyright (C) 2004, 2008 Slava Pestov.
+! Copyright (C) 2004, 2009 Slava Pestov.
  ! Copyright (C) 2008, Doug Coleman.
  ! See http://factorcode.org/license.txt for BSD license.
  USING: kernel kernel.private sequences
  sequences.private math math.private combinators ;
  IN: math.integers.private
  
+: fixnum-min ( x y -- z ) [ fixnum< ] most ; foldable
+: fixnum-max ( x y -- z ) [ fixnum> ] most ; foldable
+
  M: integer numerator ; inline
  M: integer denominator drop 1 ; inline
author	Slava Pestov <slava@slava-pestovs-macbook-pro.local>
	Sat, 29 Aug 2009 00:02:59 +0000 (19:02 -0500)
committer	Slava Pestov <slava@slava-pestovs-macbook-pro.local>
	Sat, 29 Aug 2009 00:02:59 +0000 (19:02 -0500)
basis/compiler/cfg/hats/hats.factor		patch \| blob \| history
basis/compiler/cfg/instructions/instructions.factor		patch \| blob \| history
basis/compiler/cfg/intrinsics/intrinsics.factor		patch \| blob \| history
basis/compiler/cfg/two-operand/two-operand.factor		patch \| blob \| history
basis/compiler/codegen/codegen.factor		patch \| blob \| history
basis/compiler/tests/intrinsics.factor		patch \| blob \| history
basis/compiler/tree/propagation/known-words/known-words.factor		patch \| blob \| history
basis/compiler/tree/propagation/transforms/transforms.factor		patch \| blob \| history
basis/cpu/architecture/architecture.factor		patch \| blob \| history
basis/cpu/x86/x86.factor		patch \| blob \| history
basis/math/intervals/intervals-tests.factor		patch \| blob \| history
basis/math/intervals/intervals.factor		patch \| blob \| history
core/math/floats/floats.factor		patch \| blob \| history
core/math/integers/integers.factor		patch \| blob \| history