<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleVersion</key>
- <string>0.93</string>
+ <string>0.94</string>
<key>NSHumanReadableCopyright</key>
<string>Copyright © 2003-2010 Factor developers</string>
<key>NSServices</key>
AR = ar
LD = ld
- VERSION = 0.93
+ VERSION = 0.94
BUNDLE = Factor.app
LIBPATH = -L/usr/X11R6/lib
[ 4 ] [ 5.5 { 1 2 3 4 5 6 7 8 } [ <=> ] with search drop ] unit-test
[ 10 ] [ 10 20 iota [ <=> ] with search drop ] unit-test
-[ t ] [ "hello" { "alligator" "cat" "fish" "hello" "ikarus" "java" } sorted-member? ] unit-test
+[ 0 ] [ "alligator" { "alligator" "cat" "fish" "hello" "ikarus" "java" } sorted-index ] unit-test
[ 3 ] [ "hey" { "alligator" "cat" "fish" "hello" "ikarus" "java" } sorted-index ] unit-test
+[ 5 ] [ "java" { "alligator" "cat" "fish" "hello" "ikarus" "java" } sorted-index ] unit-test
+[ t ] [ "hello" { "alligator" "cat" "fish" "hello" "ikarus" "java" } sorted-member? ] unit-test
[ f ] [ "hello" { "alligator" "cat" "fish" "ikarus" "java" } sorted-member? ] unit-test
[ f ] [ "zebra" { "alligator" "cat" "fish" "ikarus" "java" } sorted-member? ] unit-test
-! Copyright (C) 2008, 2009 Slava Pestov.
+! Copyright (C) 2008, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: kernel sequences sequences.private accessors math
-math.order combinators hints arrays ;
+USING: accessors arrays combinators hints kernel locals math
+math.order sequences ;
IN: binary-search
<PRIVATE
-: midpoint ( seq -- elt )
- [ midpoint@ ] keep nth-unsafe ; inline
+:: (search) ( seq from to quot: ( elt -- <=> ) -- i elt )
+ from to + 2/ :> midpoint@
+ midpoint@ seq nth :> midpoint
-: decide ( quot seq -- quot seq <=> )
- [ midpoint swap call ] 2keep rot ; inline
-
-: finish ( quot slice -- i elt )
- [ [ from>> ] [ midpoint@ ] bi + ] [ seq>> ] bi
- [ drop ] [ dup ] [ ] tri* nth ; inline
-
-DEFER: (search)
-
-: keep-searching ( seq quot -- slice )
- [ dup midpoint@ ] dip call collapse-slice slice boa (search) ; inline
-
-: (search) ( ... quot: ( ... elt -- ... <=> ) seq -- ... i elt )
- dup length 1 <= [
- finish
+ to from - 1 <= [
+ midpoint@ midpoint
] [
- decide {
- { +eq+ [ finish ] }
- { +lt+ [ [ (head) ] keep-searching ] }
- { +gt+ [ [ (tail) ] keep-searching ] }
+ midpoint quot call {
+ { +eq+ [ midpoint@ midpoint ] }
+ { +lt+ [ seq from midpoint@ quot (search) ] }
+ { +gt+ [ seq midpoint@ to quot (search) ] }
} case
] if ; inline recursive
PRIVATE>
-: search ( seq quot -- i elt )
- over empty? [ 2drop f f ] [ swap <flat-slice> (search) ] if ;
+: search ( seq quot: ( elt -- <=> ) -- i elt )
+ over empty? [ 2drop f f ] [ [ 0 over length ] dip (search) ] if ;
inline
: natural-search ( obj seq -- i elt )
! Regression
[ 4 [ + ] ] [ 2 2 [ [ + ] [ call ] keep ] compile-call ] unit-test
-! Regression
-USE: sorting
-USE: binary-search
-USE: binary-search.private
-
-: old-binsearch ( elt quot: ( ..a -- ..b ) seq -- elt quot i )
- dup length 1 <= [
- from>>
- ] [
- [ midpoint swap call ] 3keep [ rot ] dip swap dup zero?
- [ drop dup from>> swap midpoint@ + ]
- [ drop dup midpoint@ head-slice old-binsearch ] if
- ] if ; inline recursive
-
-[ 10 ] [
- 10 20 iota <flat-slice>
- [ [ - ] swap old-binsearch ] compile-call 2nip
-] unit-test
-
! Regression
: empty-compound ( -- ) ;
{ $description "Prepares to take the slice of a slice by adjusting the start and end indices accordingly, and replacing the slice with its underlying sequence." }
;
-HELP: <flat-slice>
-{ $values { "seq" sequence } { "slice" slice } }
-{ $description "Outputs a slice with the same elements as " { $snippet "seq" } ", and " { $snippet "from" } " equal to 0 and " { $snippet "to" } " equal to the length of " { $snippet "seq" } "." }
-{ $notes "Some words create slices then proceed to read the " { $snippet "to" } " and " { $snippet "from" } " slots of the slice. To behave predictably when they are themselves given a slice as input, they apply this word first to get a canonical slice." } ;
-
HELP: <slice>
{ $values { "from" "a non-negative integer" } { "to" "a non-negative integer" } { "seq" sequence } { "slice" slice } }
{ $description "Outputs a new virtual sequence sharing storage with the subrange of elements in " { $snippet "seq" } " with indices starting from and including " { $snippet "m" } ", and up to but not including " { $snippet "n" } "." }
{ $errors "Throws an error if " { $snippet "m" } " or " { $snippet "n" } " is out of bounds." }
-{ $notes "Taking the slice of a slice outputs a slice of the underlying sequence of the original slice. Keep this in mind when writing code which depends on the values of " { $snippet "from" } " and " { $snippet "to" } " being equal to the inputs to this word. The " { $link <flat-slice> } " word might be helpful in such situations." } ;
+{ $notes "Taking the slice of a slice outputs a slice of the underlying sequence, instead of a slice of a slice. This means that you cannot assume that the " { $snippet "from" } " and " { $snippet "to" } " slots of the resulting slice will be equal to the values you passed to " { $link <slice> } "." } ;
{ <slice> subseq } related-words
{ $subsections rest-slice but-last-slice }
"Taking a sequence apart into a head and a tail:"
{ $subsections unclip-slice unclip-last-slice cut-slice }
-"A utility for words which use slices as iterators:"
-{ $subsections <flat-slice> }
"Replacing slices with new elements:"
{ $subsections replace-slice } ;
: unclip-last-slice ( seq -- butlast-slice last )
[ but-last-slice ] [ last ] bi ; inline
-: <flat-slice> ( seq -- slice )
- dup slice? [ { } like ] when
- [ drop 0 ] [ length ] [ ] tri <slice> ;
- inline
-
<PRIVATE
: (trim-head) ( seq quot -- seq n )
-! Copyright (C) 2008 Slava Pestov.
+! Copyright (C) 2008, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: binary-search compiler.units kernel math.primes math.ranges
-memoize prettyprint sequences ;
+USING: binary-search kernel math.primes math.ranges memoize
+prettyprint sequences ;
IN: benchmark.binary-search
-[
- MEMO: primes-under-million ( -- seq ) 1000000 primes-upto ;
-] with-compilation-unit
+MEMO: primes-under-million ( -- seq ) 1000000 primes-upto ;
! Force computation of the primes list before benchmarking the binary search
primes-under-million drop
--- /dev/null
+Dmitry Shubin
--- /dev/null
+! Copyright (C) 2010 Dmitry Shubin.
+! See http://factorcode.org/license.txt for BSD license.
+USING: boyer-moore.private help.markup help.syntax kernel sequences ;
+IN: boyer-moore
+
+HELP: <boyer-moore>
+{ $values
+ { "pat" sequence } { "bm" boyer-moore }
+}
+{ $description
+ "Given a pattern performs pattern preprocessing and returns "
+ "results as an (opaque) object that is reusable across "
+ "searches in different sequences via " { $link search-from }
+ " generic word."
+} ;
+
+HELP: search-from
+{ $values
+ { "seq" sequence }
+ { "from" "a non-negative integer" }
+ { "obj" object }
+ { "i/f" "the index of first match or " { $link f } }
+}
+{ $description "Performs an attempt to find the first "
+ "occurence of pattern in " { $snippet "seq" }
+ " starting from " { $snippet "from" } " using "
+ "Boyer-Moore search algorithm. Output is the index "
+ "if the attempt was succeessful and " { $link f }
+ " otherwise."
+} ;
+
+HELP: search
+{ $values
+ { "seq" sequence }
+ { "obj" object }
+ { "i/f" "the index of first match or " { $link f } }
+}
+{ $description "A simpler variant of " { $link search-from }
+ " that starts searching from the beginning of the sequence."
+} ;
+
+ARTICLE: "boyer-moore" "The Boyer-Moore algorithm"
+{ $heading "Summary" }
+"The " { $vocab-link "boyer-moore" } " vocabulary "
+"implements a Boyer-Moore string search algorithm with "
+"so-called 'strong good suffix shift rule'. Since algorithm is "
+"alphabet-independent it is applicable to searching in any "
+"collection that implements " { $links "sequence-protocol" } "."
+
+{ $heading "Complexity" }
+"Let " { $snippet "n" } " and " { $snippet "m" } " be lengths "
+"of the sequences being searched " { $emphasis "in" } " and "
+{ $emphasis "for" } " respectively. Then searching runs in "
+{ $snippet "O(n)" } " time in its worst case using additional "
+{ $snippet "O(m)" } " space. The preprocessing phase runs in "
+{ $snippet "O(m)" } " time."
+;
+
+ABOUT: "boyer-moore"
--- /dev/null
+! Copyright (C) 2010 Dmitry Shubin.
+! See http://factorcode.org/license.txt for BSD license.
+USING: tools.test boyer-moore ;
+IN: boyer-moore.tests
+
+[ 0 ] [ "qwerty" "" search ] unit-test
+[ 0 ] [ "" "" search ] unit-test
+[ f ] [ "qw" "qwerty" search ] unit-test
+[ 3 ] [ "qwerty" "r" search ] unit-test
+[ 8 ] [ "qwerasdfqwer" 2 "qwe" search-from ] unit-test
--- /dev/null
+! Copyright (C) 2010 Dmitry Shubin.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors arrays assocs kernel locals math math.order
+math.ranges sequences sequences.private z-algorithm ;
+IN: boyer-moore
+
+<PRIVATE
+
+:: (normal-suffixes) ( i zs ss -- )
+ i zs nth-unsafe ss
+ [ [ i ] unless* ] change-nth-unsafe ; inline
+
+: normal-suffixes ( zs -- ss )
+ [ length [ f <array> ] [ [1,b) ] bi ] keep pick
+ [ (normal-suffixes) ] 2curry each ; inline
+
+:: (partial-suffixes) ( len old elt i -- len old/new old )
+ len elt i 1 + = [ len elt - ] [ old ] if old ; inline
+
+: partial-suffixes ( zs -- ss )
+ [ length dup ] [ <reversed> ] bi
+ [ (partial-suffixes) ] map-index 2nip ; inline
+
+: <gs-table> ( seq -- table )
+ z-values [ partial-suffixes ] [ normal-suffixes ] bi
+ [ [ nip ] when* ] 2map reverse! ; inline
+
+: insert-bc-shift ( table elt len i -- table )
+ 1 + swap - swap pick 2dup key?
+ [ 3drop ] [ set-at ] if ; inline
+
+: <bc-table> ( seq -- table )
+ H{ } clone swap [ length ] keep
+ [ insert-bc-shift ] with each-index ; inline
+
+TUPLE: boyer-moore pattern bc-table gs-table ;
+
+: gs-shift ( i c bm -- s ) nip gs-table>> nth-unsafe ; inline
+
+: bc-shift ( i c bm -- s ) bc-table>> at dup 1 ? + ; inline
+
+: do-shift ( pos i c bm -- newpos )
+ [ gs-shift ] [ bc-shift ] bi-curry 2bi max + ; inline
+
+: match? ( i1 s1 i2 s2 -- ? ) [ nth-unsafe ] 2bi@ = ; inline
+
+:: mismatch? ( s1 s2 pos len -- i/f )
+ len 1 - [ [ pos + s1 ] keep s2 match? not ]
+ find-last-integer ; inline
+
+:: (search-from) ( seq from bm -- i/f )
+ bm pattern>> :> pat
+ pat length :> plen
+ seq length plen - :> lim
+ from
+ [
+ dup lim <=
+ [
+ seq pat pick plen mismatch?
+ [ 2dup + seq nth-unsafe bm do-shift t ] [ f ] if*
+ ] [ drop f f ] if
+ ] loop ; inline
+
+PRIVATE>
+
+: <boyer-moore> ( pat -- bm )
+ dup <reversed> [ <bc-table> ] [ <gs-table> ] bi
+ boyer-moore boa ;
+
+GENERIC: search-from ( seq from obj -- i/f )
+
+M: sequence search-from
+ dup length zero?
+ [ 3drop 0 ] [ <boyer-moore> (search-from) ] if ;
+
+M: boyer-moore search-from (search-from) ;
+
+: search ( seq obj -- i/f ) [ 0 ] dip search-from ;
--- /dev/null
+Boyer-Moore string search algorithm
--- /dev/null
+algorithms
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda_runtime.h>
+
+static const int LOG_BANK_COUNT = 4;
+
+static inline __device__ __host__ unsigned shared_offset(unsigned i)
+{
+ return i + (i >> LOG_BANK_COUNT);
+}
+
+static inline __device__ __host__ unsigned offset_a(unsigned offset, unsigned i)
+{
+ return shared_offset(offset * (2*i + 1) - 1);
+}
+
+static inline __device__ __host__ unsigned offset_b(unsigned offset, unsigned i)
+{
+ return shared_offset(offset * (2*i + 2) - 1);
+}
+
+static inline __device__ __host__ unsigned lpot(unsigned x)
+{
+ --x; x |= x>>1; x|=x>>2; x|=x>>4; x|=x>>8; x|=x>>16; return ++x;
+}
+
+template<typename T>
+__global__ void prefix_sum_block(T *in, T *out, unsigned n)
+{
+ extern __shared__ T temp[];
+
+ int idx = threadIdx.x;
+ int blocksize = blockDim.x;
+
+ temp[shared_offset(idx )] = (idx < n) ? in[idx ] : 0;
+ temp[shared_offset(idx + blocksize)] = (idx + blocksize < n) ? in[idx + blocksize] : 0;
+
+ int offset, d;
+ for (offset = 1, d = blocksize; d > 0; d >>= 1, offset <<= 1) {
+ __syncthreads();
+ if (idx < d) {
+ unsigned a = offset_a(offset, idx), b = offset_b(offset, idx);
+ temp[b] += temp[a];
+ }
+ }
+
+ if (idx == 0) temp[shared_offset(blocksize*2 - 1)] = 0;
+
+ for (d = 1; d <= blocksize; d <<= 1) {
+ offset >>= 1;
+ __syncthreads();
+
+ if (idx < d) {
+ unsigned a = offset_a(offset, idx), b = offset_b(offset, idx);
+ unsigned t = temp[a];
+ temp[a] = temp[b];
+ temp[b] += t;
+ }
+ }
+ __syncthreads();
+
+ if (idx < n) out[idx ] = temp[shared_offset(idx )];
+ if (idx + blocksize < n) out[idx + blocksize] = temp[shared_offset(idx + blocksize)];
+}
+
+template<typename T>
+void prefix_sum(T *in, T *out, unsigned n)
+{
+ char *device_values;
+ unsigned n_lpot = lpot(n);
+ size_t n_pitch;
+
+ cudaError_t error = cudaMallocPitch((void**)&device_values, &n_pitch, sizeof(T)*n, 2);
+ if (error != 0) {
+ printf("error %u allocating width %lu height %u\n", error, sizeof(T)*n, 2);
+ exit(1);
+ }
+
+ cudaMemcpy(device_values, in, sizeof(T)*n, cudaMemcpyHostToDevice);
+
+ prefix_sum_block<<<1, n_lpot/2, shared_offset(n_lpot)*sizeof(T)>>>
+ ((T*)device_values, (T*)(device_values + n_pitch), n);
+
+ cudaMemcpy(out, device_values + n_pitch, sizeof(T)*n, cudaMemcpyDeviceToHost);
+ cudaFree(device_values);
+}
+
+int main()
+{
+ sranddev();
+
+ static unsigned in_values[1024], out_values[1024];
+
+ for (int i = 0; i < 1024; ++i)
+ in_values[i] = rand() >> 21;
+
+ prefix_sum(in_values, out_values, 1024);
+
+ for (int i = 0; i < 1024; ++i)
+ printf("%5d => %5d\n", in_values[i], out_values[i]);
+
+ return 0;
+}
--- /dev/null
+ .version 1.4
+ .target sm_10, map_f64_to_f32
+ // compiled with /usr/local/cuda/bin/../open64/lib//be
+ // nvopencc 3.0 built on 2010-03-11
+
+ //-----------------------------------------------------------
+ // Compiling /tmp/tmpxft_00000236_00000000-7_prefix-sum.cpp3.i (/var/folders/K6/K6oI14wZ2RWhSE+BYqTjA++++TI/-Tmp-/ccBI#.0ATpGM)
+ //-----------------------------------------------------------
+
+ //-----------------------------------------------------------
+ // Options:
+ //-----------------------------------------------------------
+ // Target:ptx, ISA:sm_10, Endian:little, Pointer Size:32
+ // -O3 (Optimization level)
+ // -g0 (Debug level)
+ // -m2 (Report advisories)
+ //-----------------------------------------------------------
+
+ .file 1 "<command-line>"
+ .file 2 "/tmp/tmpxft_00000236_00000000-6_prefix-sum.cudafe2.gpu"
+ .file 3 "/usr/lib/gcc/i686-apple-darwin10/4.2.1/include/stddef.h"
+ .file 4 "/usr/local/cuda/bin/../include/crt/device_runtime.h"
+ .file 5 "/usr/local/cuda/bin/../include/host_defines.h"
+ .file 6 "/usr/local/cuda/bin/../include/builtin_types.h"
+ .file 7 "/usr/local/cuda/bin/../include/device_types.h"
+ .file 8 "/usr/local/cuda/bin/../include/driver_types.h"
+ .file 9 "/usr/local/cuda/bin/../include/texture_types.h"
+ .file 10 "/usr/local/cuda/bin/../include/vector_types.h"
+ .file 11 "/usr/local/cuda/bin/../include/device_launch_parameters.h"
+ .file 12 "/usr/local/cuda/bin/../include/crt/storage_class.h"
+ .file 13 "/usr/include/i386/_types.h"
+ .file 14 "/usr/include/time.h"
+ .file 15 "prefix-sum.cu"
+ .file 16 "/usr/local/cuda/bin/../include/common_functions.h"
+ .file 17 "/usr/local/cuda/bin/../include/crt/func_macro.h"
+ .file 18 "/usr/local/cuda/bin/../include/math_functions.h"
+ .file 19 "/usr/local/cuda/bin/../include/device_functions.h"
+ .file 20 "/usr/local/cuda/bin/../include/math_constants.h"
+ .file 21 "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h"
+ .file 22 "/usr/local/cuda/bin/../include/sm_12_atomic_functions.h"
+ .file 23 "/usr/local/cuda/bin/../include/sm_13_double_functions.h"
+ .file 24 "/usr/local/cuda/bin/../include/common_types.h"
+ .file 25 "/usr/local/cuda/bin/../include/sm_20_atomic_functions.h"
+ .file 26 "/usr/local/cuda/bin/../include/sm_20_intrinsics.h"
+ .file 27 "/usr/local/cuda/bin/../include/texture_fetch_functions.h"
+ .file 28 "/usr/local/cuda/bin/../include/math_functions_dbl_ptx1.h"
+
+ .extern .shared .align 4 .b8 temp[];
+
+ .entry _Z16prefix_sum_blockIjEvPT_S1_j (
+ .param .u32 __cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_in,
+ .param .u32 __cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_out,
+ .param .u32 __cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_n)
+ {
+ .reg .u32 %r<81>;
+ .reg .pred %p<11>;
+ .loc 15 28 0
+$LBB1__Z16prefix_sum_blockIjEvPT_S1_j:
+ ld.param.u32 %r1, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_n];
+ cvt.s32.u16 %r2, %tid.x;
+ setp.lt.u32 %p1, %r2, %r1;
+ @!%p1 bra $Lt_0_7938;
+ .loc 15 35 0
+ ld.param.u32 %r3, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_in];
+ mul24.lo.u32 %r4, %r2, 4;
+ add.u32 %r5, %r3, %r4;
+ ld.global.u32 %r6, [%r5+0];
+ bra.uni $Lt_0_7682;
+$Lt_0_7938:
+ mov.u32 %r6, 0;
+$Lt_0_7682:
+ mov.u32 %r7, temp;
+ shr.u32 %r8, %r2, 4;
+ add.u32 %r9, %r2, %r8;
+ mul.lo.u32 %r10, %r9, 4;
+ add.u32 %r11, %r10, %r7;
+ st.shared.u32 [%r11+0], %r6;
+ cvt.s32.u16 %r12, %ntid.x;
+ add.s32 %r13, %r12, %r2;
+ .loc 15 28 0
+ ld.param.u32 %r1, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_n];
+ .loc 15 35 0
+ setp.lt.u32 %p2, %r13, %r1;
+ @!%p2 bra $Lt_0_8450;
+ .loc 15 36 0
+ ld.param.u32 %r14, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_in];
+ mul.lo.u32 %r15, %r13, 4;
+ add.u32 %r16, %r14, %r15;
+ ld.global.u32 %r17, [%r16+0];
+ bra.uni $Lt_0_8194;
+$Lt_0_8450:
+ mov.u32 %r17, 0;
+$Lt_0_8194:
+ shr.u32 %r18, %r13, 4;
+ add.u32 %r19, %r13, %r18;
+ mul.lo.u32 %r20, %r19, 4;
+ add.u32 %r21, %r20, %r7;
+ st.shared.u32 [%r21+0], %r17;
+ .loc 15 39 0
+ mov.s32 %r22, %r12;
+ mov.u32 %r23, 0;
+ setp.le.s32 %p3, %r12, %r23;
+ mov.s32 %r24, 1;
+ @%p3 bra $Lt_0_13314;
+$Lt_0_9218:
+ //<loop> Loop body line 39, nesting depth: 1, estimated iterations: unknown
+ .loc 15 40 0
+ bar.sync 0;
+ setp.le.s32 %p4, %r22, %r2;
+ @%p4 bra $Lt_0_9474;
+ //<loop> Part of loop body line 39, head labeled $Lt_0_9218
+ .loc 15 43 0
+ mul24.lo.u32 %r25, %r2, 2;
+ add.u32 %r26, %r25, 1;
+ add.u32 %r27, %r25, 2;
+ mul.lo.u32 %r28, %r24, %r26;
+ mul.lo.u32 %r29, %r24, %r27;
+ sub.u32 %r30, %r29, 1;
+ shr.u32 %r31, %r30, 4;
+ add.u32 %r32, %r29, %r31;
+ mul.lo.u32 %r33, %r32, 4;
+ add.u32 %r34, %r33, %r7;
+ ld.shared.u32 %r35, [%r34+-4];
+ sub.u32 %r36, %r28, 1;
+ shr.u32 %r37, %r36, 4;
+ add.u32 %r38, %r28, %r37;
+ mul.lo.u32 %r39, %r38, 4;
+ add.u32 %r40, %r7, %r39;
+ ld.shared.u32 %r41, [%r40+-4];
+ add.u32 %r42, %r35, %r41;
+ st.shared.u32 [%r34+-4], %r42;
+$Lt_0_9474:
+ //<loop> Part of loop body line 39, head labeled $Lt_0_9218
+ .loc 15 39 0
+ shr.s32 %r22, %r22, 1;
+ shl.b32 %r24, %r24, 1;
+ mov.u32 %r43, 0;
+ setp.gt.s32 %p5, %r22, %r43;
+ @%p5 bra $Lt_0_9218;
+ bra.uni $Lt_0_8706;
+$Lt_0_13314:
+$Lt_0_8706:
+ mov.u32 %r44, 0;
+ setp.ne.s32 %p6, %r2, %r44;
+ @%p6 bra $Lt_0_10242;
+ .loc 15 47 0
+ mul24.lo.s32 %r45, %r12, 2;
+ mov.u32 %r46, 0;
+ sub.u32 %r47, %r45, 1;
+ shr.u32 %r48, %r47, 4;
+ add.u32 %r49, %r45, %r48;
+ mul.lo.u32 %r50, %r49, 4;
+ add.u32 %r51, %r7, %r50;
+ st.shared.u32 [%r51+-4], %r46;
+$Lt_0_10242:
+ mov.u32 %r52, 1;
+ setp.lt.s32 %p7, %r12, %r52;
+ @%p7 bra $Lt_0_10754;
+ mov.s32 %r22, 1;
+$Lt_0_11266:
+ //<loop> Loop body line 47, nesting depth: 1, estimated iterations: unknown
+ .loc 15 50 0
+ shr.s32 %r24, %r24, 1;
+ .loc 15 51 0
+ bar.sync 0;
+ setp.le.s32 %p8, %r22, %r2;
+ @%p8 bra $Lt_0_11522;
+ //<loop> Part of loop body line 47, head labeled $Lt_0_11266
+ .loc 15 55 0
+ mul24.lo.u32 %r53, %r2, 2;
+ add.u32 %r54, %r53, 1;
+ mul.lo.u32 %r55, %r24, %r54;
+ sub.u32 %r56, %r55, 1;
+ shr.u32 %r57, %r56, 4;
+ add.u32 %r58, %r55, %r57;
+ mul.lo.u32 %r59, %r58, 4;
+ add.u32 %r60, %r59, %r7;
+ ld.shared.u32 %r61, [%r60+-4];
+ .loc 15 56 0
+ add.u32 %r62, %r53, 2;
+ mul.lo.u32 %r63, %r24, %r62;
+ sub.u32 %r64, %r63, 1;
+ shr.u32 %r65, %r64, 4;
+ add.u32 %r66, %r63, %r65;
+ mul.lo.u32 %r67, %r66, 4;
+ add.u32 %r68, %r67, %r7;
+ ld.shared.u32 %r69, [%r68+-4];
+ st.shared.u32 [%r60+-4], %r69;
+ .loc 15 57 0
+ ld.shared.u32 %r70, [%r68+-4];
+ add.u32 %r71, %r70, %r61;
+ st.shared.u32 [%r68+-4], %r71;
+$Lt_0_11522:
+ //<loop> Part of loop body line 47, head labeled $Lt_0_11266
+ .loc 15 49 0
+ shl.b32 %r22, %r22, 1;
+ setp.le.s32 %p9, %r22, %r12;
+ @%p9 bra $Lt_0_11266;
+$Lt_0_10754:
+ .loc 15 60 0
+ bar.sync 0;
+ @!%p1 bra $Lt_0_12290;
+ .loc 15 62 0
+ ld.shared.u32 %r72, [%r11+0];
+ ld.param.u32 %r73, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_out];
+ mul24.lo.u32 %r74, %r2, 4;
+ add.u32 %r75, %r73, %r74;
+ st.global.u32 [%r75+0], %r72;
+$Lt_0_12290:
+ @!%p2 bra $Lt_0_12802;
+ .loc 15 63 0
+ ld.shared.u32 %r76, [%r21+0];
+ ld.param.u32 %r77, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_out];
+ mul.lo.u32 %r78, %r13, 4;
+ add.u32 %r79, %r77, %r78;
+ st.global.u32 [%r79+0], %r76;
+$Lt_0_12802:
+ .loc 15 64 0
+ exit;
+$LDWend__Z16prefix_sum_blockIjEvPT_S1_j:
+ } // _Z16prefix_sum_blockIjEvPT_S1_j
+
--- /dev/null
+! (c)2010 Joe Groff bsd license
+USING: accessors arrays combinators io kernel math math.parser
+roles sequences strings variants words ;
+FROM: roles => TUPLE: ;
+IN: cuda.ptx
+
+UNION: dim integer sequence ;
+UNION: ?integer POSTPONE: f integer ;
+UNION: ?string POSTPONE: f string ;
+
+VARIANT: ptx-type
+ .s8 .s16 .s32 .s64
+ .u8 .u16 .u32 .u64
+ .f16 .f32 .f64
+ .b8 .b16 .b32 .b64
+ .pred
+ .texref .samplerref .surfref
+ .v2: { { of ptx-type } }
+ .v4: { { of ptx-type } }
+ .struct: { { name string } } ;
+
+VARIANT: ptx-arch
+ sm_10 sm_11 sm_12 sm_13 sm_20 ;
+UNION: ?ptx-arch POSTPONE: f ptx-arch ;
+
+VARIANT: ptx-texmode
+ .texmode_unified .texmode_independent ;
+UNION: ?ptx-texmode POSTPONE: f ptx-texmode ;
+
+VARIANT: ptx-storage-space
+ .reg
+ .sreg
+ .const: { { bank ?integer } }
+ .global
+ .local
+ .param
+ .shared
+ .tex ;
+UNION: ?ptx-storage-space POSTPONE: f ptx-storage-space ;
+
+TUPLE: ptx-target
+ { arch ?ptx-arch }
+ { map_f64_to_f32? boolean }
+ { texmode ?ptx-texmode } ;
+
+TUPLE: ptx
+ { version string }
+ { target ptx-target }
+ body ;
+
+TUPLE: ptx-struct-definition
+ { name string }
+ members ;
+
+TUPLE: ptx-variable
+ { extern? boolean }
+ { visible? boolean }
+ { align ?integer }
+ { storage-space ptx-storage-space }
+ { type ptx-type }
+ { name string }
+ { parameter ?integer }
+ { dim dim }
+ { initializer ?string } ;
+
+TUPLE: ptx-predicate
+ { negated? boolean }
+ { variable string } ;
+UNION: ?ptx-predicate POSTPONE: f ptx-predicate ;
+
+TUPLE: ptx-instruction
+ { label ?string }
+ { predicate ?ptx-predicate } ;
+
+TUPLE: ptx-entry
+ { name string }
+ params
+ directives
+ body ;
+
+TUPLE: ptx-func < ptx-entry
+ { return ptx-variable } ;
+
+TUPLE: ptx-directive ;
+
+TUPLE: .file < ptx-directive
+ { info string } ;
+TUPLE: .loc < ptx-directive
+ { info string } ;
+TUPLE: .maxnctapersm < ptx-directive
+ { ncta integer } ;
+TUPLE: .minnctapersm < ptx-directive
+ { ncta integer } ;
+TUPLE: .maxnreg < ptx-directive
+ { n integer } ;
+TUPLE: .maxntid < ptx-directive
+ { dim dim } ;
+TUPLE: .pragma < ptx-directive
+ { pragma string } ;
+
+VARIANT: ptx-float-rounding-mode
+ .rn .rz .rm .rp .approx .full ;
+VARIANT: ptx-int-rounding-mode
+ .rni .rzi .rmi .rpi ;
+UNION: ?ptx-float-rounding-mode POSTPONE: f ptx-float-rounding-mode ;
+UNION: ?ptx-int-rounding-mode POSTPONE: f ptx-int-rounding-mode ;
+
+UNION: ptx-rounding-mode
+ ptx-float-rounding-mode ptx-int-rounding-mode ;
+UNION: ?ptx-rounding-mode POSTPONE: f ptx-rounding-mode ;
+
+TUPLE: ptx-typed-instruction < ptx-instruction
+ { type ptx-type }
+ { dest string } ;
+
+TUPLE: ptx-2op-instruction < ptx-typed-instruction
+ { a string } ;
+
+TUPLE: ptx-3op-instruction < ptx-typed-instruction
+ { a string }
+ { b string } ;
+
+TUPLE: ptx-4op-instruction < ptx-typed-instruction
+ { a string }
+ { b string }
+ { c string } ;
+
+TUPLE: ptx-5op-instruction < ptx-typed-instruction
+ { a string }
+ { b string }
+ { c string }
+ { d string } ;
+
+TUPLE: ptx-addsub-instruction < ptx-3op-instruction
+ { sat? boolean }
+ { cc? boolean } ;
+
+VARIANT: ptx-mul-mode
+ .wide ;
+UNION: ?ptx-mul-mode POSTPONE: f ptx-mul-mode ;
+
+TUPLE: ptx-mul-instruction < ptx-3op-instruction
+ { mode ?ptx-mul-mode } ;
+
+TUPLE: ptx-mad-instruction < ptx-4op-instruction
+ { mode ?ptx-mul-mode }
+ { sat? boolean } ;
+
+VARIANT: ptx-prmt-mode
+ .f4e .b4e .rc8 .ecl .ecr .rc16 ;
+UNION: ?ptx-prmt-mode POSTPONE: f ptx-prmt-mode ;
+
+ROLE: ptx-float-ftz
+ { ftz? boolean } ;
+ROLE: ptx-float-env < ptx-float-ftz
+ { round ?ptx-float-rounding-mode } ;
+
+VARIANT: ptx-testp-op
+ .finite .infinite .number .notanumber .normal .subnormal ;
+
+VARIANT: ptx-cmp-op
+ .eq .ne
+ .lt .le .gt .ge
+ .ls .hs
+ .equ .neu
+ .ltu .leu .gtu .geu
+ .num .nan ;
+
+VARIANT: ptx-op
+ .and .or .xor .cas .exch .add .inc .dec .min .max
+ .popc ;
+UNION: ?ptx-op POSTPONE: f ptx-op ;
+
+SINGLETONS: .lo .hi ;
+INSTANCE: .lo ptx-mul-mode
+INSTANCE: .lo ptx-cmp-op
+INSTANCE: .hi ptx-mul-mode
+INSTANCE: .hi ptx-cmp-op
+
+TUPLE: ptx-set-instruction < ptx-3op-instruction
+ { cmp-op ptx-cmp-op }
+ { bool-op ?ptx-op }
+ { c ?string }
+ { ftz? boolean } ;
+
+VARIANT: ptx-cache-op
+ .ca .cg .cs .lu .cv
+ .wb .wt ;
+UNION: ?ptx-cache-op POSTPONE: f ptx-cache-op ;
+
+TUPLE: ptx-ldst-instruction < ptx-2op-instruction
+ { volatile? boolean }
+ { storage-space ?ptx-storage-space }
+ { cache-op ?ptx-cache-op } ;
+
+VARIANT: ptx-cache-level
+ .L1 .L2 ;
+
+TUPLE: ptx-branch-instruction < ptx-instruction
+ { target string }
+ { uni? boolean } ;
+
+VARIANT: ptx-membar-level
+ .cta .gl .sys ;
+
+VARIANT: ptx-vote-mode
+ .all .any .uni .ballot ;
+
+TUPLE: ptx-instruction-not-supported-yet < ptx-instruction ;
+
+TUPLE: abs <{ ptx-2op-instruction ptx-float-ftz } ;
+TUPLE: add <{ ptx-addsub-instruction ptx-float-env } ;
+TUPLE: addc < ptx-addsub-instruction ;
+TUPLE: and < ptx-3op-instruction ;
+TUPLE: atom < ptx-3op-instruction
+ { storage-space ?ptx-storage-space }
+ { op ptx-op }
+ { c ?string } ;
+TUPLE: bar.arrive < ptx-instruction
+ { a string }
+ { b string } ;
+TUPLE: bar.red < ptx-2op-instruction
+ { op ptx-op }
+ { b ?string }
+ { c string } ;
+TUPLE: bar.sync < ptx-instruction
+ { a string }
+ { b ?string } ;
+TUPLE: bfe < ptx-4op-instruction ;
+TUPLE: bfi < ptx-5op-instruction ;
+TUPLE: bfind < ptx-2op-instruction
+ { shiftamt? boolean } ;
+TUPLE: bra < ptx-branch-instruction ;
+TUPLE: brev < ptx-2op-instruction ;
+TUPLE: brkpt < ptx-instruction ;
+TUPLE: call < ptx-branch-instruction
+ { return ?string }
+ params ;
+TUPLE: clz < ptx-2op-instruction ;
+TUPLE: cnot < ptx-2op-instruction ;
+TUPLE: copysign < ptx-3op-instruction ;
+TUPLE: cos <{ ptx-2op-instruction ptx-float-env } ;
+TUPLE: cvt < ptx-2op-instruction
+ { rounding-mode ?ptx-rounding-mode }
+ { ftz? boolean }
+ { sat? boolean }
+ { dest-type ptx-type } ;
+TUPLE: cvta < ptx-2op-instruction
+ { to? boolean }
+ { storage-space ?ptx-storage-space } ;
+TUPLE: div <{ ptx-3op-instruction ptx-float-env } ;
+TUPLE: ex2 <{ ptx-2op-instruction ptx-float-env } ;
+TUPLE: exit < ptx-instruction ;
+TUPLE: fma <{ ptx-mad-instruction ptx-float-env } ;
+TUPLE: isspacep < ptx-instruction
+ { storage-space ?ptx-storage-space }
+ { dest string }
+ { a string } ;
+TUPLE: ld < ptx-ldst-instruction ;
+TUPLE: ldu < ptx-ldst-instruction ;
+TUPLE: lg2 <{ ptx-2op-instruction ptx-float-env } ;
+TUPLE: mad <{ ptx-mad-instruction ptx-float-env } ;
+TUPLE: mad24 < ptx-mad-instruction ;
+TUPLE: max <{ ptx-3op-instruction ptx-float-ftz } ;
+TUPLE: membar < ptx-instruction
+ { level ptx-membar-level } ;
+TUPLE: min <{ ptx-3op-instruction ptx-float-ftz } ;
+TUPLE: mov < ptx-2op-instruction ;
+TUPLE: mul <{ ptx-mul-instruction ptx-float-env } ;
+TUPLE: mul24 < ptx-mul-instruction ;
+TUPLE: neg <{ ptx-2op-instruction ptx-float-ftz } ;
+TUPLE: not < ptx-2op-instruction ;
+TUPLE: or < ptx-3op-instruction ;
+TUPLE: pmevent < ptx-instruction
+ { a string } ;
+TUPLE: popc < ptx-2op-instruction ;
+TUPLE: prefetch < ptx-instruction
+ { a string }
+ { storage-space ?ptx-storage-space }
+ { level ptx-cache-level } ;
+TUPLE: prefetchu < ptx-instruction
+ { a string }
+ { level ptx-cache-level } ;
+TUPLE: prmt < ptx-4op-instruction
+ { mode ?ptx-prmt-mode } ;
+TUPLE: rcp <{ ptx-2op-instruction ptx-float-env } ;
+TUPLE: red < ptx-2op-instruction
+ { storage-space ?ptx-storage-space }
+ { op ptx-op } ;
+TUPLE: rem < ptx-3op-instruction ;
+TUPLE: ret < ptx-instruction ;
+TUPLE: rsqrt <{ ptx-2op-instruction ptx-float-env } ;
+TUPLE: sad < ptx-4op-instruction ;
+TUPLE: selp < ptx-4op-instruction ;
+TUPLE: set < ptx-set-instruction
+ { dest-type ptx-type } ;
+TUPLE: setp < ptx-set-instruction
+ { |dest ?string } ;
+TUPLE: shl < ptx-3op-instruction ;
+TUPLE: shr < ptx-3op-instruction ;
+TUPLE: sin <{ ptx-2op-instruction ptx-float-env } ;
+TUPLE: slct < ptx-4op-instruction
+ { dest-type ptx-type }
+ { ftz? boolean } ;
+TUPLE: sqrt <{ ptx-2op-instruction ptx-float-env } ;
+TUPLE: st < ptx-ldst-instruction ;
+TUPLE: sub <{ ptx-addsub-instruction ptx-float-env } ;
+TUPLE: subc < ptx-addsub-instruction ;
+TUPLE: suld < ptx-instruction-not-supported-yet ;
+TUPLE: sured < ptx-instruction-not-supported-yet ;
+TUPLE: sust < ptx-instruction-not-supported-yet ;
+TUPLE: suq < ptx-instruction-not-supported-yet ;
+TUPLE: testp < ptx-2op-instruction
+ { op ptx-testp-op } ;
+TUPLE: tex < ptx-instruction-not-supported-yet ;
+TUPLE: txq < ptx-instruction-not-supported-yet ;
+TUPLE: trap < ptx-instruction ;
+TUPLE: vabsdiff < ptx-instruction-not-supported-yet ;
+TUPLE: vadd < ptx-instruction-not-supported-yet ;
+TUPLE: vmad < ptx-instruction-not-supported-yet ;
+TUPLE: vmax < ptx-instruction-not-supported-yet ;
+TUPLE: vmin < ptx-instruction-not-supported-yet ;
+TUPLE: vset < ptx-instruction-not-supported-yet ;
+TUPLE: vshl < ptx-instruction-not-supported-yet ;
+TUPLE: vshr < ptx-instruction-not-supported-yet ;
+TUPLE: vsub < ptx-instruction-not-supported-yet ;
+TUPLE: vote < ptx-2op-instruction
+ { mode ptx-vote-mode } ;
+TUPLE: xor < ptx-3op-instruction ;
+
+GENERIC: ptx-element-label ( elt -- label )
+M: object ptx-element-label drop f ;
+
+GENERIC: (write-ptx-element) ( elt -- )
+
+: write-ptx-element ( elt -- )
+ dup ptx-element-label [ write ":" write ] when*
+ "\t" write (write-ptx-element)
+ ";" print ;
+
+: write-ptx ( ptx -- )
+ "\t.version " write dup version>> write ";" print
+ dup target>> write-ptx-element
+ body>> [ write-ptx-element ] each ;
+
+: write-ptx-symbol ( symbol/f -- )
+ [ name>> write ] when* ;
+
+M: f (write-ptx-element)
+ drop ;
+
+M: word (write-ptx-element)
+ name>> write ;
+
+M: .const (write-ptx-element)
+ ".const" write
+ bank>> [ "[" write number>string write "]" write ] when* ;
+M: .v2 (write-ptx-element)
+ ".v2" write of>> (write-ptx-element) ;
+M: .v4 (write-ptx-element)
+ ".v4" write of>> (write-ptx-element) ;
+M: .struct (write-ptx-element)
+ ".struct " write name>> write ;
+
+M: ptx-target (write-ptx-element)
+ ".target " write
+ [ arch>> [ name>> ] [ f ] if* ]
+ [ map_f64_to_f32?>> [ "map_f64_to_f32" ] [ f ] if ]
+ [ texmode>> [ name>> ] [ f ] if* ] tri
+ 3array sift ", " join write ;
+
+: write-ptx-dim ( dim -- )
+ {
+ { [ dup zero? ] [ drop "[]" write ] }
+ { [ dup sequence? ] [ [ "[" write number>string write "]" write ] each ] }
+ [ "[" write number>string write "]" write ]
+ } cond ;
+
+M: ptx-variable (write-ptx-element)
+ dup extern?>> [ ".extern " write ] when
+ dup visible?>> [ ".visible " write ] when
+ dup align>> [ ".align " write number>string write " " write ] when*
+ dup storage-space>> (write-ptx-element) " " write
+ dup type>> (write-ptx-element) " " write
+ dup name>> write
+ dup parameter>> [ "<" write number>string write ">" write ] when*
+ dup dim>> [ write-ptx-dim ] when*
+ dup initializer>> [ " = " write write ] when*
+ drop ;
+
+: write-params ( params -- )
+ "(" write unclip (write-ptx-element)
+ [ ", " write (write-ptx-element) ] each
+ ")" write ;
+
+: write-body ( params -- )
+ "\t{" print
+ [ write-ptx-element ] each
+ "\t}" write ;
+
+: write-entry ( entry -- )
+ dup name>> write " " write
+ dup params>> [ write-params ] when* nl
+ dup directives>> [ (write-ptx-element) ] each nl
+ dup body>> write-body
+ drop ;
+
+M: ptx-entry (write-ptx-element)
+ ".entry " write
+ write-entry ;
+
+M: ptx-func (write-ptx-element)
+ ".func " write
+ dup return>> [ "(" write (write-ptx-element) ") " write ] when*
+ write-entry ;
+
+M: .file (write-ptx-element)
+ ".file " write info>> write ;
+M: .loc (write-ptx-element)
+ ".loc " write info>> write ;
+M: .maxnctapersm (write-ptx-element)
+ ".maxnctapersm " write ncta>> number>string write ;
+M: .minnctapersm (write-ptx-element)
+ ".minnctapersm " write ncta>> number>string write ;
+M: .maxnreg (write-ptx-element)
+ ".maxnreg " write n>> number>string write ;
+M: .maxntid (write-ptx-element)
+ ".maxntid " write
+ dup sequence? [ [ number>string ] map ", " join write ] [ number>string write ] if ;
+M: .pragma (write-ptx-element)
+ ".pragma \"" write pragma>> write "\"" write ;
+
+M: ptx-instruction ptx-element-label
+ label>> ;
+
+: write-insn ( insn name -- insn )
+ over predicate>>
+ [ "@" write dup negated?>> [ "!" write ] when variable>> write " " write ] when*
+ write ;
+
+: write-2op ( insn -- )
+ dup type>> (write-ptx-element) " " write
+ dup dest>> write ", " write
+ dup a>> write
+ drop ;
+
+: write-3op ( insn -- )
+ dup write-2op ", " write
+ dup b>> write
+ drop ;
+
+: write-4op ( insn -- )
+ dup write-3op ", " write
+ dup c>> write
+ drop ;
+
+: write-5op ( insn -- )
+ dup write-4op ", " write
+ dup d>> write
+ drop ;
+
+: write-ftz ( insn -- )
+ ftz?>> [ ".ftz" write ] when ;
+
+: write-sat ( insn -- )
+ sat?>> [ ".sat" write ] when ;
+
+: write-float-env ( insn -- )
+ dup round>> (write-ptx-element)
+ write-ftz ;
+
+: write-int-addsub ( insn -- )
+ dup write-sat
+ dup cc?>> [ ".cc" write ] when
+ write-3op ;
+
+: write-addsub ( insn -- )
+ dup write-float-env
+ write-int-addsub ;
+
+: write-ldst ( insn -- )
+ dup volatile?>> [ ".volatile" write ] when
+ dup storage-space>> (write-ptx-element)
+ dup cache-op>> (write-ptx-element)
+ write-2op ;
+
+: (write-mul) ( insn -- )
+ dup mode>> (write-ptx-element)
+ drop ;
+
+: write-mul ( insn -- )
+ dup write-float-env
+ dup (write-mul)
+ write-3op ;
+
+: write-mad ( insn -- )
+ dup write-float-env
+ dup (write-mul)
+ dup write-sat
+ write-4op ;
+
+: write-uni ( insn -- )
+ uni?>> [ ".uni" write ] when ;
+
+: write-set ( insn -- )
+ dup cmp-op>> (write-ptx-element)
+ dup bool-op>> (write-ptx-element)
+ write-ftz ;
+
+M: abs (write-ptx-element)
+ "abs" write-insn
+ dup write-ftz
+ write-2op ;
+M: add (write-ptx-element)
+ "add" write-insn
+ write-addsub ;
+M: addc (write-ptx-element)
+ "addc" write-insn
+ write-int-addsub ;
+M: and (write-ptx-element)
+ "and" write-insn
+ write-3op ;
+M: atom (write-ptx-element)
+ "atom" write-insn
+ dup storage-space>> (write-ptx-element)
+ dup op>> (write-ptx-element)
+ dup write-3op
+ c>> [ ", " write write ] when* ;
+M: bar.arrive (write-ptx-element)
+ "bar.arrive " write-insn
+ dup a>> write ", " write
+ dup b>> write
+ drop ;
+M: bar.red (write-ptx-element)
+ "bar.red" write-insn
+ dup op>> (write-ptx-element)
+ dup write-2op
+ dup b>> [ ", " write write ] when*
+ ", " write c>> write ;
+M: bar.sync (write-ptx-element)
+ "bar.arrive " write-insn
+ dup a>> write
+ dup b>> [ ", " write write ] when*
+ drop ;
+M: bfe (write-ptx-element)
+ "bfe" write-insn
+ write-4op ;
+M: bfi (write-ptx-element)
+ "bfi" write-insn
+ write-5op ;
+M: bfind (write-ptx-element)
+ "bfind" write-insn
+ dup shiftamt?>> [ ".shiftamt" write ] when
+ write-2op ;
+M: bra (write-ptx-element)
+ "bra" write-insn
+ dup write-uni
+ " " write target>> write ;
+M: brev (write-ptx-element)
+ "brev" write-insn
+ write-2op ;
+M: brkpt (write-ptx-element)
+ "brkpt" write-insn drop ;
+M: call (write-ptx-element)
+ "call" write-insn " " write
+ dup return>> [ "(" write write "), " write ] when*
+ dup target>> write
+ dup params>> [ ", (" write ", " join write ")" write ] unless-empty
+ drop ;
+M: clz (write-ptx-element)
+ "clz" write-insn
+ write-2op ;
+M: cnot (write-ptx-element)
+ "cnot" write-insn
+ write-2op ;
+M: copysign (write-ptx-element)
+ "copysign" write-insn
+ write-3op ;
+M: cos (write-ptx-element)
+ "cos" write-insn
+ dup write-float-env
+ write-2op ;
+M: cvt (write-ptx-element)
+ "cvt" write-insn
+ dup rounding-mode>> (write-ptx-element)
+ dup write-ftz
+ dup write-sat
+ dup dest-type>> (write-ptx-element)
+ write-2op ;
+M: cvta (write-ptx-element)
+ "cvta" write-insn
+ dup to?>> [ ".to" write ] when
+ dup storage-space>> (write-ptx-element)
+ write-2op ;
+M: div (write-ptx-element)
+ "div" write-insn
+ dup write-float-env
+ write-3op ;
+M: ex2 (write-ptx-element)
+ "ex2" write-insn
+ dup write-float-env
+ write-2op ;
+M: exit (write-ptx-element)
+ "exit" write-insn drop ;
+M: fma (write-ptx-element)
+ "fma" write-insn
+ write-mad ;
+M: isspacep (write-ptx-element)
+ "isspacep" write-insn
+ dup storage-space>> (write-ptx-element)
+ " " write
+ dup dest>> write ", " write a>> write ;
+M: ld (write-ptx-element)
+ "ld" write-insn
+ write-ldst ;
+M: ldu (write-ptx-element)
+ "ldu" write-insn
+ write-ldst ;
+M: lg2 (write-ptx-element)
+ "lg2" write-insn
+ dup write-float-env
+ write-2op ;
+M: mad (write-ptx-element)
+ "mad" write-insn
+ write-mad ;
+M: mad24 (write-ptx-element)
+ "mad24" write-insn
+ dup (write-mul)
+ dup write-sat
+ write-4op ;
+M: max (write-ptx-element)
+ "max" write-insn
+ dup write-ftz
+ write-3op ;
+M: membar (write-ptx-element)
+ "membar" write-insn
+ dup level>> (write-ptx-element)
+ drop ;
+M: min (write-ptx-element)
+ "min" write-insn
+ dup write-ftz
+ write-3op ;
+M: mov (write-ptx-element)
+ "mov" write-insn
+ write-2op ;
+M: mul (write-ptx-element)
+ "mul" write-insn
+ write-mul ;
+M: mul24 (write-ptx-element)
+ "mul24" write-insn
+ dup (write-mul)
+ write-3op ;
+M: neg (write-ptx-element)
+ "neg" write-insn
+ dup write-ftz
+ write-2op ;
+M: not (write-ptx-element)
+ "not" write-insn
+ write-2op ;
+M: or (write-ptx-element)
+ "or" write-insn
+ write-3op ;
+M: pmevent (write-ptx-element)
+ "pmevent" write-insn " " write a>> write ;
+M: popc (write-ptx-element)
+ "popc" write-insn
+ write-2op ;
+M: prefetch (write-ptx-element)
+ "prefetch" write-insn
+ dup storage-space>> (write-ptx-element)
+ dup level>> (write-ptx-element)
+ " " write a>> write ;
+M: prefetchu (write-ptx-element)
+ "prefetchu" write-insn
+ dup level>> (write-ptx-element)
+ " " write a>> write ;
+M: prmt (write-ptx-element)
+ "prmt" write-insn
+ dup mode>> (write-ptx-element)
+ write-4op ;
+M: rcp (write-ptx-element)
+ "rcp" write-insn
+ dup write-float-env
+ write-3op ;
+M: red (write-ptx-element)
+ "red" write-insn
+ dup storage-space>> (write-ptx-element)
+ dup op>> (write-ptx-element)
+ write-2op ;
+M: rem (write-ptx-element)
+ "rem" write-insn
+ write-3op ;
+M: ret (write-ptx-element)
+ "ret" write-insn drop ;
+M: rsqrt (write-ptx-element)
+ "rsqrt" write-insn
+ dup write-float-env
+ write-2op ;
+M: sad (write-ptx-element)
+ "sad" write-insn
+ write-4op ;
+M: selp (write-ptx-element)
+ "selp" write-insn
+ write-4op ;
+M: set (write-ptx-element)
+ "set" write-insn
+ dup write-set
+ dup dest-type>> (write-ptx-element)
+ dup write-3op
+ c>> [ ", " write write ] when* ;
+M: setp (write-ptx-element)
+ "setp" write-insn
+ dup write-set
+ dup type>> (write-ptx-element) " " write
+ dup dest>> write
+ dup |dest>> [ "|" write write ] when* ", " write
+ dup a>> write ", " write
+ dup b>> write
+ c>> [ ", " write write ] when* ;
+M: shl (write-ptx-element)
+ "shl" write-insn
+ write-3op ;
+M: shr (write-ptx-element)
+ "shr" write-insn
+ write-3op ;
+M: sin (write-ptx-element)
+ "sin" write-insn
+ dup write-float-env
+ write-2op ;
+M: slct (write-ptx-element)
+ "slct" write-insn
+ dup write-ftz
+ dup dest-type>> (write-ptx-element)
+ write-4op ;
+M: sqrt (write-ptx-element)
+ "sqrt" write-insn
+ dup write-float-env
+ write-2op ;
+M: st (write-ptx-element)
+ "st" write-insn
+ write-ldst ;
+M: sub (write-ptx-element)
+ "sub" write-insn
+ write-addsub ;
+M: subc (write-ptx-element)
+ "subc" write-insn
+ write-int-addsub ;
+M: testp (write-ptx-element)
+ "testp" write-insn
+ dup op>> (write-ptx-element)
+ write-2op ;
+M: vote (write-ptx-element)
+ "vote" write-insn
+ dup mode>> (write-ptx-element)
+ write-2op ;
+M: xor (write-ptx-element)
+ "or" write-insn
+ write-3op ;
-! Copyright (C) 2008 Eduardo Cavazos, Slava Pestov.
+! Copyright (C) 2008, 2010 Eduardo Cavazos, Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: system io.files io.pathnames namespaces kernel accessors
assocs ;
! Keep test-log around?
SYMBOL: builder-debug
-! Host to send status notifications to.
-SYMBOL: status-host
+! URL for status notifications.
+SYMBOL: status-url
-! Username to log in.
-SYMBOL: status-username
+! Password for status notifications.
+SYMBOL: status-secret
SYMBOL: upload-help?
! Copyright (C) 2009, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: arrays accessors io io.sockets io.encodings.utf8 io.files
-io.launcher kernel make mason.config mason.common mason.email
-mason.twitter namespaces sequences prettyprint fry ;
+USING: accessors fry http.client io io.encodings.utf8 io.files
+kernel mason.common mason.config mason.email mason.twitter
+namespaces prettyprint sequences ;
IN: mason.notify
-: status-notify ( input-file args -- )
- status-host get [
- [
- "ssh" , status-host get , "-l" , status-username get ,
- "./mason-notify" ,
- short-host-name ,
- target-cpu get ,
- target-os get ,
- ] { } make prepend
- [ 5 ] 2dip '[
- <process>
- _ >>stdin
- _ >>command
- short-running-process
- ] retry
- ] [ 2drop ] if ;
+: status-notify ( report arg message -- )
+ [
+ short-host-name "host-name" set
+ target-cpu get "target-cpu" set
+ target-os get "target-os" set
+ status-secret get "secret" set
+ "message" set
+ "arg" set
+ "report" set
+ ] H{ } make-assoc
+ [ 5 ] dip '[ _ status-url get http-post 2drop ] retry ;
: notify-heartbeat ( -- )
- f { "heartbeat" } status-notify ;
+ f f "heartbeat" status-notify ;
: notify-begin-build ( git-id -- )
[ "Starting build of GIT ID " write print flush ]
- [ f swap "git-id" swap 2array status-notify ]
+ [ f swap "git-id" status-notify ]
bi ;
: notify-make-vm ( -- )
"Compiling VM" print flush
- f { "make-vm" } status-notify ;
+ f f "make-vm" status-notify ;
: notify-boot ( -- )
"Bootstrapping" print flush
- f { "boot" } status-notify ;
+ f f "boot" status-notify ;
: notify-test ( -- )
"Running tests" print flush
- f { "test" } status-notify ;
+ f f "test" status-notify ;
: notify-report ( status -- )
[ "Build finished with status: " write . flush ]
[
- [ "report" ] dip
- [ [ utf8 file-contents ] dip email-report ]
- [ "report" swap name>> 2array status-notify ]
- 2bi
+ [ "report" utf8 file-contents ] dip
+ [ name>> "report" status-notify ] [ email-report ] 2bi
] bi ;
: notify-release ( archive-name -- )
[ "Uploaded " prepend [ print flush ] [ mason-tweet ] bi ]
- [ f swap "release" swap 2array status-notify ]
+ [ f swap "release" status-notify ]
bi ;
+++ /dev/null
-Slava Pestov
\ No newline at end of file
+++ /dev/null
-! Copyright (C) 2009, 2010 Slava Pestov.
-! See http://factorcode.org/license.txt for BSD license.
-USING: accessors calendar combinators combinators.smart
-command-line db.tuples io io.encodings.utf8 io.files kernel
-mason.server namespaces present sequences ;
-IN: mason.server.notify
-
-SYMBOLS: host-name target-os target-cpu message message-arg ;
-
-: parse-args ( command-line -- )
- dup last message-arg set
- [
- {
- [ host-name set ]
- [ target-cpu set ]
- [ target-os set ]
- [ message set ]
- } spread
- ] input<sequence ;
-
-: find-builder ( -- builder )
- builder new
- host-name get >>host-name
- target-os get >>os
- target-cpu get >>cpu
- dup select-tuple [ ] [ dup insert-tuple ] ?if ;
-
-: heartbeat ( builder -- ) now >>heartbeat-timestamp drop ;
-
-: git-id ( builder id -- ) >>current-git-id +starting+ >>status drop ;
-
-: make-vm ( builder -- ) +make-vm+ >>status drop ;
-
-: boot ( builder -- ) +boot+ >>status drop ;
-
-: test ( builder -- ) +test+ >>status drop ;
-
-: report ( builder status content -- )
- [ >>status ] [ >>last-report ] bi*
- dup status>> +clean+ = [
- dup current-git-id>> >>clean-git-id
- dup current-timestamp>> >>clean-timestamp
- ] when
- dup current-git-id>> >>last-git-id
- dup current-timestamp>> >>last-timestamp
- drop ;
-
-: release ( builder name -- )
- >>last-release
- dup clean-git-id>> >>release-git-id
- drop ;
-
-: update-builder ( builder -- )
- message get {
- { "heartbeat" [ heartbeat ] }
- { "git-id" [ message-arg get git-id ] }
- { "make-vm" [ make-vm ] }
- { "boot" [ boot ] }
- { "test" [ test ] }
- { "report" [ message-arg get contents report ] }
- { "release" [ message-arg get release ] }
- } case ;
-
-: handle-update ( command-line timestamp -- )
- [
- [ parse-args find-builder ] dip >>current-timestamp
- [ update-builder ] [ update-tuple ] bi
- ] with-mason-db ;
-
-CONSTANT: log-file "resource:mason.log"
-
-: log-update ( command-line timestamp -- )
- log-file utf8 [
- present write ": " write " " join print
- ] with-file-appender ;
-
-: main ( -- )
- command-line get now [ log-update ] [ handle-update ] 2bi ;
-
-MAIN: main
last-release release-git-id
last-git-id last-timestamp last-report
current-git-id current-timestamp
-status
-heartbeat-timestamp ;
+status ;
builder "BUILDERS" {
{ "host-name" "HOST_NAME" TEXT +user-assigned-id+ }
! Can't name it CURRENT_TIMESTAMP because of bug in db library
{ "current-timestamp" "CURR_TIMESTAMP" TIMESTAMP }
{ "status" "STATUS" TEXT }
-
- { "heartbeat-timestamp" "HEARTBEAT_TIMESTAMP" TIMESTAMP }
} define-persistent
: mason-db ( -- db ) "resource:mason.db" <sqlite-db> ;
: remote-directory ( string -- string' )
[ upload-directory get ] dip "/" glue ;
-: remote ( string version -- string )
- remote-directory swap "/" glue ;
-
: platform ( builder -- string )
[ os>> ] [ cpu>> ] bi (platform) ;
] [ drop ] 2bi release-directory ;
: remote-binary-release-name ( version builder -- string )
- [ binary-release-name ] [ drop ] 2bi remote ;
+ binary-release-name remote-directory ;
: source-release-name ( version -- string )
[ "factor-src-" ".zip" surround ] keep release-directory ;
: remote-source-release-name ( version -- string )
- [ source-release-name ] keep remote ;
+ source-release-name remote-directory ;
: make-release-directory ( version -- )
"Creating release directory..." print flush
- [ "mkdir -p " % "" release-directory % "\n" % ] "" make
+ [ "mkdir -p " % "" release-directory remote-directory % "\n" % ] "" make
execute-on-server ;
: tweet-release ( version announcement-url -- )
<table border="1">
<tr><td>Host name:</td><td><t:xml t:name="host-name" /></td></tr>
- <tr><td>Last heartbeat:</td><td><t:xml t:name="last-heartbeat" /></td></tr>
+ <tr><td>Last heartbeat:</td><td><t:label t:name="current-timestamp" /></td></tr>
<tr><td>Current status:</td><td><t:xml t:name="status" /></td></tr>
<tr><td>Last build:</td><td><t:xml t:name="last-build" /></td></tr>
<tr><td>Last clean build:</td><td><t:xml t:name="last-clean-build" /></td></tr>
: <make-release-action> ( -- action )
<page-action>
- [ { { "version" [ v-one-line ] } } validate-params ] >>validate
+ [
+ {
+ { "version" [ v-one-line ] }
+ { "announcement-url" [ v-url ] }
+ } validate-params
+ ] >>validate
[
[
"version" value "announcement-url" value do-release
http.server.dispatchers mason.server webapps.mason.grids
webapps.mason.make-release webapps.mason.package
webapps.mason.release webapps.mason.report
-webapps.mason.downloads ;
+webapps.mason.downloads webapps.mason.status-update ;
IN: webapps.mason
TUPLE: mason-app < dispatcher ;
<protected>
"make releases" >>description
{ can-make-releases? } >>capabilities
+ "make-release" add-responder
- "make-release" add-responder ;
+ <status-update-action>
+ "status-update" add-responder ;
[ current-status "status" set-value ]
[ last-build-status "last-build" set-value ]
[ clean-build-status "last-clean-build" set-value ]
- [ heartbeat-timestamp>> "heartbeat-timestamp" set-value ]
+ [ current-timestamp>> "current-timestamp" set-value ]
[ packages-link "binaries" set-value ]
[ clean-image-link "clean-images" set-value ]
[ report-link "last-report" set-value ]
--- /dev/null
+Slava Pestov
--- /dev/null
+! Copyright (C) 2010 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors calendar combinators db.tuples furnace.actions
+furnace.redirection html.forms http.server.responses io kernel
+mason.config mason.server namespaces validators ;
+IN: webapps.mason.status-update
+
+: find-builder ( -- builder )
+ builder new
+ "host-name" value >>host-name
+ "target-os" value >>os
+ "target-cpu" value >>cpu
+ dup select-tuple [ ] [ dup insert-tuple ] ?if ;
+
+: git-id ( builder id -- ) >>current-git-id +starting+ >>status drop ;
+
+: make-vm ( builder -- ) +make-vm+ >>status drop ;
+
+: boot ( builder -- ) +boot+ >>status drop ;
+
+: test ( builder -- ) +test+ >>status drop ;
+
+: report ( builder status content -- )
+ [ >>status ] [ >>last-report ] bi*
+ dup status>> +clean+ = [
+ dup current-git-id>> >>clean-git-id
+ dup current-timestamp>> >>clean-timestamp
+ ] when
+ dup current-git-id>> >>last-git-id
+ dup current-timestamp>> >>last-timestamp
+ drop ;
+
+: release ( builder name -- )
+ >>last-release
+ dup clean-git-id>> >>release-git-id
+ drop ;
+
+: update-builder ( builder -- )
+ "message" value {
+ { "heartbeat" [ drop ] }
+ { "git-id" [ "arg" value git-id ] }
+ { "make-vm" [ make-vm ] }
+ { "boot" [ boot ] }
+ { "test" [ test ] }
+ { "report" [ "arg" value "report" value report ] }
+ { "release" [ "arg" value release ] }
+ } case ;
+
+: <status-update-action> ( -- action )
+ <action>
+ [
+ {
+ { "host-name" [ v-one-line ] }
+ { "target-cpu" [ v-one-line ] }
+ { "target-os" [ v-one-line ] }
+ { "message" [ v-one-line ] }
+ { "arg" [ [ v-one-line ] v-optional ] }
+ { "report" [ ] }
+ { "secret" [ v-one-line ] }
+ } validate-params
+
+ "secret" value status-secret get = [ validation-failed ] unless
+ ] >>validate
+
+ [
+ [
+ [
+ find-builder
+ now >>current-timestamp
+ [ update-builder ] [ update-tuple ] bi
+ ] with-mason-db
+ "OK" "text/html" <content>
+ ] if-secure
+ ] >>submit ;
--- /dev/null
+Dmitry Shubin
--- /dev/null
+Z algorithm for pattern preprocessing
--- /dev/null
+algorithms
--- /dev/null
+! Copyright (C) 2010 Dmitry Shubin.
+! See http://factorcode.org/license.txt for BSD license.
+USING: arrays help.markup help.syntax sequences ;
+IN: z-algorithm
+
+HELP: lcp
+{ $values
+ { "seq1" sequence } { "seq2" sequence }
+ { "n" "a non-negative integer" }
+}
+{ $description
+ "Outputs the length of longest common prefix of two sequences."
+} ;
+
+HELP: z-values
+{ $values
+ { "seq" sequence } { "Z" array }
+}
+{ $description
+ "Outputs an array of the same length as " { $snippet "seq" }
+ ", containing Z-values for given sequence. See "
+ { $link "z-algorithm" } " for details."
+} ;
+
+ARTICLE: "z-algorithm" "Z algorithm"
+{ $heading "Definition" }
+"Given the sequence " { $snippet "S" } " and the index "
+{ $snippet "i" } ", let " { $snippet "i" } "-th Z value of "
+{ $snippet "S" } " be the length of the longest subsequence of "
+{ $snippet "S" } " that starts at " { $snippet "i" }
+" and matches the prefix of " { $snippet "S" } "."
+
+{ $heading "Example" }
+"Here is an example for string " { $snippet "\"abababaca\"" } ":"
+{ $table
+ { { $snippet "i:" } "0" "1" "2" "3" "4" "5" "6" "7" "8" }
+ { { $snippet "S:" } "a" "b" "a" "b" "a" "b" "a" "c" "a" }
+ { { $snippet "Z:" } "9" "0" "5" "0" "3" "0" "1" "0" "1" }
+}
+
+{ $heading "Summary" }
+"The " { $vocab-link "z-algorithm" }
+" vocabulary implements algorithm for finding all Z values for sequence "
+{ $snippet "S" }
+" in linear time. In contrast to naive approach which takes "
+{ $snippet "Θ(n^2)" } " time."
+;
+
+ABOUT: "z-algorithm"
--- /dev/null
+! Copyright (C) 2010 Dmitry Shubin.
+! See http://factorcode.org/license.txt for BSD license.
+USING: tools.test z-algorithm ;
+IN: z-algorithm.tests
+
+[ 0 ] [ "qwerty" "" lcp ] unit-test
+[ 0 ] [ "qwerty" "asdf" lcp ] unit-test
+[ 3 ] [ "qwerty" "qwe" lcp ] unit-test
+[ 3 ] [ "qwerty" "qwet" lcp ] unit-test
+
+[ { } ] [ "" z-values ] unit-test
+[ { 1 } ] [ "q" z-values ] unit-test
+[ { 9 0 5 0 3 0 1 0 1 } ] [ "abababaca" z-values ] unit-test
--- /dev/null
+! Copyright (C) 2010 Dmitry Shubin.
+! See http://factorcode.org/license.txt for BSD license.
+USING: arrays combinators.smart kernel locals math math.ranges
+sequences sequences.private ;
+IN: z-algorithm
+
+: lcp ( seq1 seq2 -- n )
+ [ min-length ] 2keep mismatch [ nip ] when* ;
+
+<PRIVATE
+
+:: out-of-zbox ( seq Z l r k -- seq Z l r )
+ seq k tail-slice seq lcp :> Zk
+ Zk Z push seq Z
+ Zk 0 > [ k Zk k + 1 - ] [ l r ] if ; inline
+
+:: inside-zbox ( seq Z l r k -- seq Z l r )
+ k l - Z nth :> Zk'
+ r k - 1 + :> b
+ seq Z Zk' b <
+ [ Zk' Z push l r ] ! still inside
+ [
+ seq r 1 + seq b [ tail-slice ] 2bi@ lcp :> q
+ q b + Z push k q r +
+ ] if ; inline
+
+: (z-value) ( seq Z l r k -- seq Z l r )
+ 2dup < [ out-of-zbox ] [ inside-zbox ] if ; inline
+
+:: (z-values) ( seq -- Z )
+ V{ } clone 0 0 seq length :> ( Z l r len )
+ len Z push [ seq Z l r 1 len [a,b) [ (z-value) ] each ]
+ drop-outputs Z ; inline
+
+PRIVATE>
+
+: z-values ( seq -- Z )
+ dup length 0 > [ (z-values) ] when >array ;
(setq fuel-stack-mode-string "/S")
(when fuel-mode-stack-p (fuel-stack-mode fuel-mode))
- (when (and fuel-mode (not (file-exists-p (buffer-file-name))))
- (fuel-scaffold--maybe-insert)))
+ (let ((file-name (buffer-file-name)))
+ (when (and fuel-mode
+ file-name
+ (not (file-exists-p file-name)))
+ (fuel-scaffold--maybe-insert))))
\f
;;; Keys: