: pad-bytes ( seq -- newseq )
dup length bootstrap-cell align 0 pad-right ;
+: check-string ( string -- )
+ [ 127 > ] contains?
+ [ "Bootstrap cannot emit non-ASCII strings" throw ] when ;
+
: emit-string ( string -- ptr )
+ dup check-string
string type-number object tag-number [
dup length emit-fixnum
f ' emit
M: ##slot defs-vregs dst/tmp-vregs ;
M: ##set-slot defs-vregs temp>> 1array ;
M: ##string-nth defs-vregs dst/tmp-vregs ;
+M: ##set-string-nth-fast defs-vregs temp>> 1array ;
M: ##compare defs-vregs dst/tmp-vregs ;
M: ##compare-imm defs-vregs dst/tmp-vregs ;
M: ##compare-float defs-vregs dst/tmp-vregs ;
M: ##set-slot uses-vregs [ src>> ] [ obj>> ] [ slot>> ] tri 3array ;
M: ##set-slot-imm uses-vregs [ src>> ] [ obj>> ] bi 2array ;
M: ##string-nth uses-vregs [ obj>> ] [ index>> ] bi 2array ;
+M: ##set-string-nth-fast uses-vregs [ src>> ] [ obj>> ] [ index>> ] tri 3array ;
M: ##conditional-branch uses-vregs [ src1>> ] [ src2>> ] bi 2array ;
M: ##compare-imm-branch uses-vregs src1>> 1array ;
M: ##dispatch uses-vregs src>> 1array ;
! String element access
INSN: ##string-nth < ##flushable { obj vreg } { index vreg } { temp vreg } ;
+INSN: ##set-string-nth-fast < ##effect { obj vreg } { index vreg } { temp vreg } ;
! Integer arithmetic
INSN: ##add < ##commutative ;
slots.private:slot
slots.private:set-slot
strings.private:string-nth
+ strings.private:set-string-nth-fast
classes.tuple.private:<tuple-boa>
arrays:<array>
byte-arrays:<byte-array>
{ \ slots.private:slot [ emit-slot iterate-next ] }
{ \ slots.private:set-slot [ emit-set-slot iterate-next ] }
{ \ strings.private:string-nth [ drop emit-string-nth iterate-next ] }
+ { \ strings.private:set-string-nth-fast [ drop emit-set-string-nth-fast iterate-next ] }
{ \ classes.tuple.private:<tuple-boa> [ emit-<tuple-boa> iterate-next ] }
{ \ arrays:<array> [ emit-<array> iterate-next ] }
{ \ byte-arrays:<byte-array> [ emit-<byte-array> iterate-next ] }
: emit-string-nth ( -- )
2inputs swap ^^untag-fixnum ^^string-nth ^^tag-fixnum ds-push ;
+
+: emit-set-string-nth-fast ( -- )
+ 3inputs [ ^^untag-fixnum ] [ ^^untag-fixnum ] [ ] tri*
+ swap i ##set-string-nth-fast ;
[ temp>> register ]
} cleave %string-nth ;
+M: ##set-string-nth-fast generate-insn
+ {
+ [ src>> register ]
+ [ obj>> register ]
+ [ index>> register ]
+ [ temp>> register ]
+ } cleave %set-string-nth-fast ;
+
: dst/src ( insn -- dst src )
[ dst>> register ] [ src>> register ] bi ; inline
HOOK: %set-slot-imm cpu ( src obj slot tag -- )
HOOK: %string-nth cpu ( dst obj index temp -- )
+HOOK: %set-string-nth-fast cpu ( ch obj index temp -- )
HOOK: %add cpu ( dst src1 src2 -- )
HOOK: %add-imm cpu ( dst src1 src2 -- )
M:: x86 %string-nth ( dst src index temp -- )
"end" define-label
dst { src index temp } [| new-dst |
+ ! Load the least significant 7 bits into new-dst.
+ ! 8th bit indicates whether we have to load from
+ ! the aux vector or not.
temp src index [+] LEA
new-dst 1 small-reg temp string-offset [+] MOV
new-dst new-dst 1 small-reg MOVZX
+ ! Do we have to look at the aux vector?
+ new-dst HEX: 80 CMP
+ "end" get JL
+ ! Yes, this is a non-ASCII character. Load aux vector
temp src string-aux-offset [+] MOV
- temp \ f tag-number CMP
- "end" get JE
new-dst temp XCHG
+ ! Compute index
new-dst index ADD
new-dst index ADD
+ ! Load high 16 bits
new-dst 2 small-reg new-dst byte-array-offset [+] MOV
new-dst new-dst 2 small-reg MOVZX
- new-dst 8 SHL
- new-dst temp OR
+ new-dst 7 SHL
+ ! Compute code point
+ new-dst temp XOR
"end" resolve-label
dst new-dst ?MOV
] with-small-register ;
+M:: x86 %set-string-nth-fast ( ch str index temp -- )
+ ch { index str } [| new-ch |
+ new-ch ch ?MOV
+ temp str index [+] LEA
+ temp string-offset [+] new-ch 1 small-reg MOV
+ ] with-small-register ;
+
:: %alien-integer-getter ( dst src size quot -- )
dst { src } [| new-dst |
new-dst dup size small-reg dup src [] MOV
\ string-nth { fixnum string } { fixnum } define-primitive
\ string-nth make-flushable
-\ set-string-nth { fixnum fixnum string } { } define-primitive
+\ set-string-nth-slow { fixnum fixnum string } { } define-primitive
+\ set-string-nth-fast { fixnum fixnum string } { } define-primitive
\ resize-array { integer array } { array } define-primitive
\ resize-array make-flushable
{ "alien-address" "alien" }
{ "set-slot" "slots.private" }
{ "string-nth" "strings.private" }
- { "set-string-nth" "strings.private" }
+ { "set-string-nth-fast" "strings.private" }
+ { "set-string-nth-slow" "strings.private" }
{ "resize-array" "arrays" }
{ "resize-string" "strings" }
{ "<array>" "arrays" }
: rehash-string ( str -- )
1 over sequence-hashcode swap set-string-hashcode ; inline
+: set-string-nth ( ch n str -- )
+ pick HEX: 7f fixnum<=
+ [ set-string-nth-fast ] [ set-string-nth-slow ] if ; inline
+
PRIVATE>
M: string equal?
] if ;
M: string hashcode*
- nip dup string-hashcode [ ]
- [ dup rehash-string string-hashcode ] ?if ;
+ nip
+ dup string-hashcode
+ [ ] [ dup rehash-string string-hashcode ] ?if ;
M: string length
length>> ;
M: string set-nth-unsafe
dup reset-string-hashcode
- [ [ >fixnum ] dip >fixnum ] dip set-string-nth ;
+ [ >fixnum ] [ >fixnum ] [ ] tri* set-string-nth ;
M: string clone
(clone) [ clone ] change-aux ;
primitive_alien_address,
primitive_set_slot,
primitive_string_nth,
- primitive_set_string_nth,
+ primitive_set_string_nth_fast,
+ primitive_set_string_nth_slow,
primitive_resize_array,
primitive_resize_string,
primitive_array,
/* Strings */
CELL string_nth(F_STRING* string, CELL index)
{
+ /* If high bit is set, the most significant 16 bits of the char
+ come from the aux vector. The least significant bit of the
+ corresponding aux vector entry is negated, so that we can
+ XOR the two components together and get the original code point
+ back. */
CELL ch = bget(SREF(string,index));
- if(string->aux == F)
+ if((ch & 0x80) == 0)
return ch;
else
{
F_BYTE_ARRAY *aux = untag_object(string->aux);
- return (cget(BREF(aux,index * sizeof(u16))) << 8) | ch;
+ return (cget(BREF(aux,index * sizeof(u16))) << 7) ^ ch;
}
}
-/* allocates memory */
-void set_string_nth(F_STRING* string, CELL index, CELL value)
+void set_string_nth_fast(F_STRING* string, CELL index, CELL ch)
{
- bput(SREF(string,index),value & 0xff);
+ bput(SREF(string,index),ch);
+}
+void set_string_nth_slow(F_STRING* string, CELL index, CELL ch)
+{
F_BYTE_ARRAY *aux;
+ bput(SREF(string,index),(ch & 0x7f) | 0x80);
+
if(string->aux == F)
{
- if(value <= 0xff)
- return;
- else
- {
- REGISTER_UNTAGGED(string);
- aux = allot_byte_array(
- untag_fixnum_fast(string->length)
- * sizeof(u16));
- UNREGISTER_UNTAGGED(string);
+ REGISTER_UNTAGGED(string);
+ /* We don't need to pre-initialize the
+ byte array with any data, since we
+ only ever read from the aux vector
+ if the most significant bit of a
+ character is set. Initially all of
+ the bits are clear. */
+ aux = allot_byte_array_internal(
+ untag_fixnum_fast(string->length)
+ * sizeof(u16));
+ UNREGISTER_UNTAGGED(string);
- write_barrier((CELL)string);
- string->aux = tag_object(aux);
- }
+ write_barrier((CELL)string);
+ string->aux = tag_object(aux);
}
else
aux = untag_object(string->aux);
- cput(BREF(aux,index * sizeof(u16)),value >> 8);
+ cput(BREF(aux,index * sizeof(u16)),(ch >> 7) ^ 1);
+}
+
+/* allocates memory */
+void set_string_nth(F_STRING* string, CELL index, CELL ch)
+{
+ if(ch <= 0x7f)
+ set_string_nth_fast(string,index,ch);
+ else
+ set_string_nth_slow(string,index,ch);
}
/* untagged */
/* allocates memory */
void fill_string(F_STRING *string, CELL start, CELL capacity, CELL fill)
{
- if(fill == 0)
- {
- memset((void *)SREF(string,start),'\0',capacity - start);
-
- if(string->aux != F)
- {
- F_BYTE_ARRAY *aux = untag_object(string->aux);
- memset((void *)BREF(aux,start * sizeof(u16)),'\0',
- (capacity - start) * sizeof(u16));
- }
- }
+ if(fill <= 0x7f)
+ memset((void *)SREF(string,start),fill,capacity - start);
else
{
CELL i;
CELL value = untag_fixnum_fast(dpop());
set_string_nth(string,index,value);
}
+
+void primitive_set_string_nth_fast(void)
+{
+ F_STRING *string = untag_object(dpop());
+ CELL index = untag_fixnum_fast(dpop());
+ CELL value = untag_fixnum_fast(dpop());
+ set_string_nth_fast(string,index,value);
+}
+
+void primitive_set_string_nth_slow(void)
+{
+ F_STRING *string = untag_object(dpop());
+ CELL index = untag_fixnum_fast(dpop());
+ CELL value = untag_fixnum_fast(dpop());
+ set_string_nth_slow(string,index,value);
+}
void set_string_nth(F_STRING* string, CELL index, CELL value);
void primitive_string_nth(void);
-void primitive_set_string_nth(void);
+void primitive_set_string_nth_slow(void);
+void primitive_set_string_nth_fast(void);
F_WORD *allot_word(CELL vocab, CELL name);
void primitive_word(void);