use: src/tagged-rep obj/tagged-rep
literal: slot tag ;
-! String element access
-INSN: ##string-nth
-def: dst/int-rep
-use: obj/tagged-rep index/int-rep
-temp: temp/int-rep ;
-
! Register transfers
INSN: ##copy
def: dst
UNION: def-is-use-insn
##box-alien
##box-displaced-alien
-##string-nth
##unbox-any-c-ptr ;
SYMBOL: vreg-insn
{ kernel:eq? [ emit-eq ] }
{ slots.private:slot [ emit-slot ] }
{ slots.private:set-slot [ emit-set-slot ] }
- { strings.private:string-nth [ drop emit-string-nth ] }
+ { strings.private:string-nth-fast [ drop emit-string-nth-fast ] }
{ strings.private:set-string-nth-fast [ drop emit-set-string-nth-fast ] }
{ classes.tuple.private:<tuple-boa> [ emit-<tuple-boa> ] }
{ arrays:<array> [ emit-<array> ] }
compiler.cfg.stacks cpu.architecture ;
IN: compiler.cfg.intrinsics.strings
-: emit-string-nth ( -- )
- 2inputs swap ^^string-nth ds-push ;
+: (string-nth) ( n string -- base offset rep c-type )
+ ^^tagged>integer swap ^^add string-offset int-rep uchar ; inline
+
+: emit-string-nth-fast ( -- )
+ 2inputs (string-nth) ^^load-memory-imm ds-push ;
: emit-set-string-nth-fast ( -- )
- 3inputs ^^tagged>integer ^^add string-offset
- int-rep uchar ##store-memory-imm ;
+ 3inputs (string-nth) ##store-memory-imm ;
CODEGEN: ##slot-imm %slot-imm
CODEGEN: ##set-slot %set-slot
CODEGEN: ##set-slot-imm %set-slot-imm
-CODEGEN: ##string-nth %string-nth
CODEGEN: ##add %add
CODEGEN: ##add-imm %add-imm
CODEGEN: ##sub %sub
} compile-test-bb
] unit-test
-[ CHAR: l ] [
- V{
- T{ ##load-reference f 0 "hello world" }
- T{ ##load-tagged f 1 3 }
- T{ ##string-nth f 0 0 1 2 }
- T{ ##shl-imm f 0 0 4 }
- } compile-test-bb
-] unit-test
-
[ 1 ] [
V{
T{ ##load-tagged f 0 32 }
] "outputs" set-word-prop
] each
-\ string-nth [
- 2drop fixnum 0 23 2^ [a,b] <class/interval-info>
+\ string-nth-fast [
+ 2drop fixnum 0 255 [a,b] <class/interval-info>
] "outputs" set-word-prop
{
[ t ] [ [ { 1 } diff ] { diff } inlined? ] unit-test
[ f ] [ [ { 1 } swap diff ] { diff } inlined? ] unit-test ! We could do this
+
+! Output range for string-nth now that string-nth is a library word and
+! not a primitive
+[ t ] [
+ ! Should actually be 0 23 2^ 1 - [a,b]
+ [ string-nth ] final-info first interval>> 0 23 2^ [a,b] =
+] unit-test
HOOK: %set-slot cpu ( src obj slot scale tag -- )
HOOK: %set-slot-imm cpu ( src obj slot tag -- )
-HOOK: %string-nth cpu ( dst obj index temp -- )
-
HOOK: %add cpu ( dst src1 src2 -- )
HOOK: %add-imm cpu ( dst src1 src2 -- )
HOOK: %sub cpu ( dst src1 src2 -- )
M: ppc %set-slot ( src obj slot -- ) swapd STWX ;
M: ppc %set-slot-imm ( src obj slot tag -- ) slot-offset STW ;
-M:: ppc %string-nth ( dst src index temp -- )
- [
- "end" define-label
- temp src index ADD
- dst temp string-offset LBZ
- 0 dst HEX: 80 CMPI
- "end" get BLT
- temp src string-aux-offset LWZ
- temp temp index ADD
- temp temp index ADD
- temp temp byte-array-offset LHZ
- temp temp 7 SLWI
- dst dst temp XOR
- "end" resolve-label
- ] with-scope ;
-
M: ppc %add ADD ;
M: ppc %add-imm ADDI ;
M: ppc %sub swap SUBF ;
USING: bootstrap.image.private compiler.constants
compiler.units cpu.x86.assembler cpu.x86.assembler.operands
kernel kernel.private layouts locals.backend make math
-math.private namespaces sequences slots.private vocabs ;
+math.private namespaces sequences slots.private strings.private
+vocabs ;
IN: bootstrap.x86
big-endian off
ds-reg [] temp0 MOV
] \ slot define-sub-primitive
+[
+ ! load string index from stack
+ temp0 ds-reg bootstrap-cell neg [+] MOV
+ temp0 tag-bits get SHR
+ ! load string from stack
+ temp1 ds-reg [] MOV
+ ! load character
+ temp0 8-bit-version-of temp0 temp1 string-offset [++] MOV
+ temp0 temp0 8-bit-version-of MOVZX
+ temp0 tag-bits get SHL
+ ! store character to stack
+ ds-reg bootstrap-cell SUB
+ ds-reg [] temp0 MOV
+] \ string-nth-fast define-sub-primitive
+
! Shufflers
[
ds-reg bootstrap-cell SUB
[ quot call ] with-save/restore
] if ; inline
-M:: x86 %string-nth ( dst src index temp -- )
- ! We request a small-reg of size 8 since those of size 16 are
- ! a superset.
- "end" define-label
- dst { src index temp } 8 [| new-dst |
- ! Load the least significant 7 bits into new-dst.
- ! 8th bit indicates whether we have to load from
- ! the aux vector or not.
- new-dst 8-bit-version-of src index string-offset [++] MOV
- new-dst new-dst 8-bit-version-of MOVZX
- ! Do we have to look at the aux vector?
- new-dst HEX: 80 CMP
- "end" get JL
- ! Yes, this is a non-ASCII character. Load aux vector
- temp src string-aux-offset [+] MOV
- new-dst temp XCHG
- ! Load high 16 bits
- new-dst 16-bit-version-of new-dst index byte-array-offset [+*2+] MOV
- new-dst new-dst 16-bit-version-of MOVZX
- new-dst 7 SHL
- ! Compute code point
- new-dst temp XOR
- "end" resolve-label
- dst new-dst int-rep %copy
- ] with-small-register ;
-
:: %alien-integer-getter ( dst exclude address bits quot -- )
dst exclude bits [| new-dst |
new-dst dup bits n-bit-version-of dup address MOV
\ set-slot { object object fixnum } { } define-primitive
\ set-special-object { object fixnum } { } define-primitive
\ set-string-nth-fast { fixnum fixnum string } { } define-primitive
-\ set-string-nth-slow { fixnum fixnum string } { } define-primitive
\ size { object } { fixnum } define-primitive \ size make-flushable
\ slot { object fixnum } { object } define-primitive \ slot make-flushable
\ special-object { fixnum } { object } define-primitive \ special-object make-flushable
-\ string-nth { fixnum string } { fixnum } define-primitive \ string-nth make-flushable
+\ string-nth-fast { fixnum string } { fixnum } define-primitive \ string-nth-fast make-flushable
\ strip-stack-traces { } { } define-primitive
\ system-micros { } { integer } define-primitive \ system-micros make-flushable
\ tag { object } { fixnum } define-primitive \ tag make-foldable
{ "fixnum<=" "math.private" (( x y -- z )) }
{ "fixnum>" "math.private" (( x y -- ? )) }
{ "fixnum>=" "math.private" (( x y -- ? )) }
+ { "string-nth-fast" "strings.private" (( n string -- ch )) }
{ "(set-context)" "threads.private" (( obj context -- obj' )) }
{ "(set-context-and-delete)" "threads.private" (( obj context -- * )) }
{ "(start-context)" "threads.private" (( obj quot -- obj' )) }
{ "<string>" "strings" "primitive_string" (( n ch -- string )) }
{ "resize-string" "strings" "primitive_resize_string" (( n str -- newstr )) }
{ "set-string-nth-fast" "strings.private" "primitive_set_string_nth_fast" (( ch n string -- )) }
- { "set-string-nth-slow" "strings.private" "primitive_set_string_nth_slow" (( ch n string -- )) }
- { "string-nth" "strings.private" "primitive_string_nth" (( n string -- ch )) }
{ "(exit)" "system" "primitive_exit" (( n -- * )) }
{ "nano-count" "system" "primitive_nano_count" (( -- ns )) }
{ "system-micros" "system" "primitive_system_micros" (( -- us )) }
"s" get >array
] unit-test
+! Make sure string initialization works
+[ HEX: 123456 ] [ 100 HEX: 123456 <string> first ] unit-test
+
! Make sure we clear aux vector when storing octets
[ "\u123456hi" ] [ "ih\u123456" clone reverse! ] unit-test
-! Copyright (C) 2003, 2008 Slava Pestov.
+! Copyright (C) 2003, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors kernel math.private sequences kernel.private
-math sequences.private slots.private alien.accessors ;
+USING: accessors alien.accessors byte-arrays kernel math.private
+sequences kernel.private math sequences.private slots.private ;
IN: strings
<PRIVATE
: rehash-string ( str -- )
1 over sequence-hashcode swap set-string-hashcode ; inline
+: (aux) ( n string -- byte-array m )
+ aux>> { byte-array } declare swap 1 fixnum-shift-fast ; inline
+
+: small-char? ( ch -- ? ) HEX: 7f fixnum<= ; inline
+
+: string-nth ( n string -- ch )
+ 2dup string-nth-fast dup small-char?
+ [ 2nip ] [
+ [ (aux) alien-unsigned-2 7 fixnum-shift-fast ] dip
+ fixnum-bitxor
+ ] if ; inline
+
+: ensure-aux ( string -- string )
+ dup aux>> [ dup length 2 * (byte-array) >>aux ] unless ; inline
+
+: set-string-nth-slow ( ch n string -- )
+ [ [ HEX: 80 fixnum-bitor ] 2dip set-string-nth-fast ]
+ [
+ ensure-aux
+ [ -7 fixnum-shift-fast 1 fixnum-bitxor ] 2dip
+ (aux) set-alien-unsigned-2
+ ] 3bi ;
+
: set-string-nth ( ch n string -- )
- pick HEX: 7f fixnum<=
+ pick small-char?
[ set-string-nth-fast ] [ set-string-nth-slow ] if ; inline
PRIVATE>
std::ostream &operator<<(std::ostream &out, const string *str)
{
for(cell i = 0; i < string_capacity(str); i++)
- out << (char)str->nth(i);
+ out << (char)str->data()[i];
return out;
}
cell hashcode;
u8 *data() const { return (u8 *)(this + 1); }
-
- cell nth(cell i) const;
};
struct code_block;
_(set_slot) \
_(set_special_object) \
_(set_string_nth_fast) \
- _(set_string_nth_slow) \
_(size) \
_(sleep) \
_(special_object) \
_(string) \
- _(string_nth) \
_(strip_stack_traces) \
_(system_micros) \
_(tuple) \
namespace factor
{
-cell string::nth(cell index) const
-{
- /* If high bit is set, the most significant 16 bits of the char
- come from the aux vector. The least significant bit of the
- corresponding aux vector entry is negated, so that we can
- XOR the two components together and get the original code point
- back. */
- cell lo_bits = data()[index];
-
- if((lo_bits & 0x80) == 0)
- return lo_bits;
- else
- {
- byte_array *aux = untag<byte_array>(this->aux);
- cell hi_bits = aux->data<u16>()[index];
- return (hi_bits << 7) ^ lo_bits;
- }
-}
-
-void factor_vm::set_string_nth_fast(string *str, cell index, cell ch)
-{
- str->data()[index] = (u8)ch;
-}
-
-void factor_vm::set_string_nth_slow(string *str_, cell index, cell ch)
-{
- data_root<string> str(str_,this);
-
- byte_array *aux;
-
- str->data()[index] = ((ch & 0x7f) | 0x80);
-
- if(to_boolean(str->aux))
- aux = untag<byte_array>(str->aux);
- else
- {
- /* We don't need to pre-initialize the
- byte array with any data, since we
- only ever read from the aux vector
- if the most significant bit of a
- character is set. Initially all of
- the bits are clear. */
- aux = allot_uninitialized_array<byte_array>(untag_fixnum(str->length) * sizeof(u16));
-
- str->aux = tag<byte_array>(aux);
- write_barrier(&str->aux);
- }
-
- aux->data<u16>()[index] = (u16)((ch >> 7) ^ 1);
-}
-
-/* allocates memory */
-void factor_vm::set_string_nth(string *str, cell index, cell ch)
-{
- if(ch <= 0x7f)
- set_string_nth_fast(str,index,ch);
- else
- set_string_nth_slow(str,index,ch);
-}
-
/* Allocates memory */
string *factor_vm::allot_string_internal(cell capacity)
{
data_root<string> str(str_,this);
if(fill <= 0x7f)
- memset(&str->data()[start],(int)fill,capacity - start);
+ memset(&str->data()[start],(u8)fill,capacity - start);
else
{
- cell i;
+ byte_array *aux;
+ if(to_boolean(str->aux))
+ aux = untag<byte_array>(str->aux);
+ else
+ {
+ aux = allot_uninitialized_array<byte_array>(untag_fixnum(str->length) * 2);
+ str->aux = tag<byte_array>(aux);
+ write_barrier(&str->aux);
+ }
- for(i = start; i < capacity; i++)
- set_string_nth(str.untagged(),i,fill);
+ u8 lo_fill = (u8)((fill & 0x7f) | 0x80);
+ u16 hi_fill = (u16)((fill >> 7) ^ 0x1);
+ memset(&str->data()[start],lo_fill,capacity - start);
+ memset_2(&aux->data<u16>()[start],hi_fill,(capacity - start) * sizeof(u16));
}
}
if(to_boolean(str->aux))
{
- byte_array *new_aux = allot_byte_array(capacity * sizeof(u16));
-
+ byte_array *new_aux = allot_uninitialized_array<byte_array>(capacity * 2);
new_str->aux = tag<byte_array>(new_aux);
write_barrier(&new_str->aux);
ctx->push(tag<string>(reallot_string(str.untagged(),capacity)));
}
-void factor_vm::primitive_string_nth()
-{
- string *str = untag<string>(ctx->pop());
- cell index = untag_fixnum(ctx->pop());
- ctx->push(tag_fixnum(str->nth(index)));
-}
-
void factor_vm::primitive_set_string_nth_fast()
{
string *str = untag<string>(ctx->pop());
cell index = untag_fixnum(ctx->pop());
cell value = untag_fixnum(ctx->pop());
- set_string_nth_fast(str,index,value);
-}
-
-void factor_vm::primitive_set_string_nth_slow()
-{
- string *str = untag<string>(ctx->pop());
- cell index = untag_fixnum(ctx->pop());
- cell value = untag_fixnum(ctx->pop());
- set_string_nth_slow(str,index,value);
+ str->data()[index] = (u8)value;
}
}
namespace factor
{
+inline static void memset_2(void *dst, u16 pattern, size_t size)
+{
+#ifdef __APPLE__
+ cell cell_pattern = (pattern | (pattern << 16));
+ memset_pattern4(dst,&cell_pattern,size);
+#else
+ if(pattern == 0)
+ memset(dst,0,size);
+ else
+ {
+ u16 *start = (u16 *)dst;
+ u16 *end = (u16 *)((cell)dst + size);
+ while(start < end)
+ {
+ *start = pattern;
+ start++;
+ }
+ }
+#endif
+}
+
inline static void memset_cell(void *dst, cell pattern, size_t size)
{
#ifdef __APPLE__
cell std_vector_to_array(std::vector<cell> &elements);
// strings
- cell string_nth(const string *str, cell index);
- void set_string_nth_fast(string *str, cell index, cell ch);
- void set_string_nth_slow(string *str_, cell index, cell ch);
- void set_string_nth(string *str, cell index, cell ch);
string *allot_string_internal(cell capacity);
void fill_string(string *str_, cell start, cell capacity, cell fill);
string *allot_string(cell capacity, cell fill);
bool reallot_string_in_place_p(string *str, cell capacity);
string* reallot_string(string *str_, cell capacity);
void primitive_resize_string();
- void primitive_string_nth();
void primitive_set_string_nth_fast();
- void primitive_set_string_nth_slow();
// booleans
cell tag_boolean(cell untagged)