]> gitweb.factorcode.org Git - factor.git/blob - vmpp/strings.cpp
a69e7dd3c7c357f975f2528c8a12c14c64699476
[factor.git] / vmpp / strings.cpp
1 #include "master.hpp"
2
3 CELL string_nth(F_STRING* string, CELL index)
4 {
5         /* If high bit is set, the most significant 16 bits of the char
6         come from the aux vector. The least significant bit of the
7         corresponding aux vector entry is negated, so that we can
8         XOR the two components together and get the original code point
9         back. */
10         CELL ch = bget(SREF(string,index));
11         if((ch & 0x80) == 0)
12                 return ch;
13         else
14         {
15                 F_BYTE_ARRAY *aux = untag_byte_array_fast(string->aux);
16                 return (cget(BREF(aux,index * sizeof(u16))) << 7) ^ ch;
17         }
18 }
19
20 void set_string_nth_fast(F_STRING *string, CELL index, CELL ch)
21 {
22         bput(SREF(string,index),ch);
23 }
24
25 void set_string_nth_slow(F_STRING *string_, CELL index, CELL ch)
26 {
27         gc_root<F_STRING> string(string_);
28
29         F_BYTE_ARRAY *aux;
30
31         bput(SREF(string.untagged(),index),(ch & 0x7f) | 0x80);
32
33         if(string->aux == F)
34         {
35                 /* We don't need to pre-initialize the
36                 byte array with any data, since we
37                 only ever read from the aux vector
38                 if the most significant bit of a
39                 character is set. Initially all of
40                 the bits are clear. */
41                 aux = allot_array_internal<F_BYTE_ARRAY>(
42                         untag_fixnum_fast(string->length)
43                         * sizeof(u16));
44
45                 write_barrier(string.value());
46                 string->aux = tag_object(aux);
47         }
48         else
49                 aux = untag_byte_array_fast(string->aux);
50
51         cput(BREF(aux,index * sizeof(u16)),(ch >> 7) ^ 1);
52 }
53
54 /* allocates memory */
55 void set_string_nth(F_STRING* string, CELL index, CELL ch)
56 {
57         if(ch <= 0x7f)
58                 set_string_nth_fast(string,index,ch);
59         else
60                 set_string_nth_slow(string,index,ch);
61 }
62
63 /* Allocates memory */
64 F_STRING *allot_string_internal(CELL capacity)
65 {
66         F_STRING *string = allot<F_STRING>(string_size(capacity));
67
68         string->length = tag_fixnum(capacity);
69         string->hashcode = F;
70         string->aux = F;
71
72         return string;
73 }
74
75 /* Allocates memory */
76 void fill_string(F_STRING *string_, CELL start, CELL capacity, CELL fill)
77 {
78         gc_root<F_STRING> string(string_);
79
80         if(fill <= 0x7f)
81                 memset((void *)SREF(string.untagged(),start),fill,capacity - start);
82         else
83         {
84                 CELL i;
85
86                 for(i = start; i < capacity; i++)
87                         set_string_nth(string.untagged(),i,fill);
88         }
89 }
90
91 /* Allocates memory */
92 F_STRING *allot_string(CELL capacity, CELL fill)
93 {
94         gc_root<F_STRING> string(allot_string_internal(capacity));
95         fill_string(string.untagged(),0,capacity,fill);
96         return string.untagged();
97 }
98
99 void primitive_string(void)
100 {
101         CELL initial = to_cell(dpop());
102         CELL length = unbox_array_size();
103         dpush(tag_object(allot_string(length,initial)));
104 }
105
106 static bool reallot_string_in_place_p(F_STRING *string, CELL capacity)
107 {
108         return in_zone(&nursery,(CELL)string) && capacity <= string_capacity(string);
109 }
110
111 F_STRING* reallot_string(F_STRING *string_, CELL capacity)
112 {
113         gc_root<F_STRING> string(string_);
114
115         if(reallot_string_in_place_p(string.untagged(),capacity))
116         {
117                 string->length = tag_fixnum(capacity);
118
119                 if(string->aux != F)
120                 {
121                         F_BYTE_ARRAY *aux = untag_byte_array_fast(string->aux);
122                         aux->capacity = tag_fixnum(capacity * 2);
123                 }
124
125                 return string.untagged();
126         }
127         else
128         {
129                 CELL to_copy = string_capacity(string.untagged());
130                 if(capacity < to_copy)
131                         to_copy = capacity;
132
133                 gc_root<F_STRING> new_string(allot_string_internal(capacity));
134
135                 memcpy(new_string.untagged() + 1,string.untagged() + 1,to_copy);
136
137                 if(string->aux != F)
138                 {
139                         F_BYTE_ARRAY *new_aux = allot_byte_array(capacity * sizeof(u16));
140
141                         write_barrier(new_string.value());
142                         new_string->aux = tag_object(new_aux);
143
144                         F_BYTE_ARRAY *aux = untag_byte_array_fast(string->aux);
145                         memcpy(new_aux + 1,aux + 1,to_copy * sizeof(u16));
146                 }
147
148                 fill_string(new_string.untagged(),to_copy,capacity,'\0');
149                 return new_string.untagged();
150         }
151 }
152
153 void primitive_resize_string(void)
154 {
155         F_STRING* string = untag_string(dpop());
156         CELL capacity = unbox_array_size();
157         dpush(tag_object(reallot_string(string,capacity)));
158 }
159
160 /* Some ugly macros to prevent a 2x code duplication */
161
162 #define MEMORY_TO_STRING(type,utype) \
163         F_STRING *memory_to_##type##_string(const type *string, CELL length) \
164         { \
165                 REGISTER_C_STRING(string); \
166                 gc_root<F_STRING> s(allot_string_internal(length)); \
167                 UNREGISTER_C_STRING(type,string); \
168                 CELL i; \
169                 for(i = 0; i < length; i++) \
170                 { \
171                         set_string_nth(s.untagged(),i,(utype)*string);  \
172                         string++; \
173                 } \
174                 return s.untagged(); \
175         } \
176         F_STRING *from_##type##_string(const type *str) \
177         { \
178                 CELL length = 0; \
179                 const type *scan = str; \
180                 while(*scan++) length++; \
181                 return memory_to_##type##_string(str,length); \
182         } \
183         void box_##type##_string(const type *str) \
184         { \
185                 dpush(str ? tag_object(from_##type##_string(str)) : F); \
186         }
187
188 MEMORY_TO_STRING(char,u8)
189 MEMORY_TO_STRING(u16,u16)
190 MEMORY_TO_STRING(u32,u32)
191
192 bool check_string(F_STRING *s, CELL max)
193 {
194         CELL capacity = string_capacity(s);
195         CELL i;
196         for(i = 0; i < capacity; i++)
197         {
198                 CELL ch = string_nth(s,i);
199                 if(ch == 0 || ch >= ((CELL)1 << (max * 8)))
200                         return false;
201         }
202         return true;
203 }
204
205 F_BYTE_ARRAY *allot_c_string(CELL capacity, CELL size)
206 {
207         return allot_byte_array((capacity + 1) * size);
208 }
209
210 #define STRING_TO_MEMORY(type) \
211         void type##_string_to_memory(F_STRING *s, type *string) \
212         { \
213                 CELL i; \
214                 CELL capacity = string_capacity(s); \
215                 for(i = 0; i < capacity; i++) \
216                         string[i] = string_nth(s,i); \
217         } \
218         void primitive_##type##_string_to_memory(void) \
219         { \
220                 type *address = (type *)unbox_alien();  \
221                 F_STRING *str = untag_string(dpop()); \
222                 type##_string_to_memory(str,address); \
223         } \
224         F_BYTE_ARRAY *string_to_##type##_alien(F_STRING *s_, bool check) \
225         { \
226                 gc_root<F_STRING> s(s_); \
227                 CELL capacity = string_capacity(s.untagged());  \
228                 F_BYTE_ARRAY *_c_str; \
229                 if(check && !check_string(s.untagged(),sizeof(type)))   \
230                         general_error(ERROR_C_STRING,s.value(),F,NULL); \
231                 _c_str = allot_c_string(capacity,sizeof(type)); \
232                 type *c_str = (type*)(_c_str + 1); \
233                 type##_string_to_memory(s.untagged(),c_str);    \
234                 c_str[capacity] = 0; \
235                 return _c_str; \
236         } \
237         type *to_##type##_string(F_STRING *s, bool check) \
238         { \
239                 return (type*)(string_to_##type##_alien(s,check) + 1); \
240         } \
241         type *unbox_##type##_string(void) \
242         { \
243                 return to_##type##_string(untag_string(dpop()),true); \
244         }
245
246 STRING_TO_MEMORY(char);
247 STRING_TO_MEMORY(u16);
248
249 void primitive_string_nth(void)
250 {
251         F_STRING *string = untag_string_fast(dpop());
252         CELL index = untag_fixnum_fast(dpop());
253         dpush(tag_fixnum(string_nth(string,index)));
254 }
255
256 void primitive_set_string_nth(void)
257 {
258         F_STRING *string = untag_string_fast(dpop());
259         CELL index = untag_fixnum_fast(dpop());
260         CELL value = untag_fixnum_fast(dpop());
261         set_string_nth(string,index,value);
262 }
263
264 void primitive_set_string_nth_fast(void)
265 {
266         F_STRING *string = untag_string_fast(dpop());
267         CELL index = untag_fixnum_fast(dpop());
268         CELL value = untag_fixnum_fast(dpop());
269         set_string_nth_fast(string,index,value);
270 }
271
272 void primitive_set_string_nth_slow(void)
273 {
274         F_STRING *string = untag_string_fast(dpop());
275         CELL index = untag_fixnum_fast(dpop());
276         CELL value = untag_fixnum_fast(dpop());
277         set_string_nth_slow(string,index,value);
278 }