]> gitweb.factorcode.org Git - factor.git/blob - basis/regexp/regexp-tests.factor
xmode.marker: caching match group regexps for performance
[factor.git] / basis / regexp / regexp-tests.factor
1 USING: arrays regexp tools.test kernel sequences regexp.parser
2 regexp.private eval strings multiline accessors ;
3 IN: regexp.tests
4
5 { f } [ "b" "a*" <regexp> matches? ] unit-test
6 { t } [ "" "a*" <regexp> matches? ] unit-test
7 { t } [ "a" "a*" <regexp> matches? ] unit-test
8 { t } [ "aaaaaaa" "a*"  <regexp> matches? ] unit-test
9 { f } [ "ab" "a*" <regexp> matches? ] unit-test
10
11 { t } [ "abc" "abc" <regexp> matches? ] unit-test
12 { t } [ "a" "a|b|c" <regexp> matches? ] unit-test
13 { t } [ "b" "a|b|c" <regexp> matches? ] unit-test
14 { t } [ "c" "a|b|c" <regexp> matches? ] unit-test
15 { f } [ "c" "d|e|f" <regexp> matches? ] unit-test
16
17 { t } [ "b" "|b" <regexp> matches? ] unit-test
18 { t } [ "b" "b|" <regexp> matches? ] unit-test
19 { t } [ "" "b|" <regexp> matches? ] unit-test
20 { t } [ "" "b|" <regexp> matches? ] unit-test
21 { t } [ "" "|" <regexp> matches? ] unit-test
22 { t } [ "" "|||||||" <regexp> matches? ] unit-test
23
24 { f } [ "aa" "a|b|c" <regexp> matches? ] unit-test
25 { f } [ "bb" "a|b|c" <regexp> matches? ] unit-test
26 { f } [ "cc" "a|b|c" <regexp> matches? ] unit-test
27 { f } [ "cc" "d|e|f" <regexp> matches? ] unit-test
28
29 { f } [ "" "a+" <regexp> matches? ] unit-test
30 { t } [ "a" "a+" <regexp> matches? ] unit-test
31 { t } [ "aa" "a+" <regexp> matches? ] unit-test
32
33 { t } [ "" "a?" <regexp> matches? ] unit-test
34 { t } [ "a" "a?" <regexp> matches? ] unit-test
35 { f } [ "aa" "a?" <regexp> matches? ] unit-test
36
37 { f } [ "" "." <regexp> matches? ] unit-test
38 { t } [ "a" "." <regexp> matches? ] unit-test
39 { t } [ "." "." <regexp> matches? ] unit-test
40
41 ! Dotall mode -- when on, . matches newlines.
42 ! Off by default.
43 { f } [ "\n" "." <regexp> matches? ] unit-test
44 { t } [ "\n" "(?s:.)" <regexp> matches? ] unit-test
45 { t } [ "\n" R/ ./s matches? ] unit-test
46 { f } [ "\n\n" "(?s:.)." <regexp> matches? ] unit-test
47
48 { f } [ "" ".+" <regexp> matches? ] unit-test
49 { t } [ "a" ".+" <regexp> matches? ] unit-test
50 { t } [ "ab" ".+" <regexp> matches? ] unit-test
51
52 { t } [ "\0" "[\\0]" <regexp> matches? ] unit-test
53 { f } [ "0" "[\\0]" <regexp> matches? ] unit-test
54
55 { t } [ " " "[\\s]" <regexp> matches? ] unit-test
56 { f } [ "a" "[\\s]" <regexp> matches? ] unit-test
57 { f } [ " " "[\\S]" <regexp> matches? ] unit-test
58 { t } [ "a" "[\\S]" <regexp> matches? ] unit-test
59 { f } [ " " "[\\w]" <regexp> matches? ] unit-test
60 { t } [ "a" "[\\w]" <regexp> matches? ] unit-test
61 { t } [ " " "[\\W]" <regexp> matches? ] unit-test
62 { f } [ "a" "[\\W]" <regexp> matches? ] unit-test
63
64 { t } [ "/" "\\/" <regexp> matches? ] unit-test
65
66 { t } [ "a" R/ a/i matches? ] unit-test
67
68 { t } [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
69 { t } [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
70 { t } [ "c" "a|b*|c+|d?" <regexp> matches? ] unit-test
71 { t } [ "cc" "a|b*|c+|d?" <regexp> matches? ] unit-test
72 { f } [ "ccd" "a|b*|c+|d?" <regexp> matches? ] unit-test
73 { t } [ "d" "a|b*|c+|d?" <regexp> matches? ] unit-test
74
75 { t } [ "foo" "foo|bar" <regexp> matches? ] unit-test
76 { t } [ "bar" "foo|bar" <regexp> matches? ] unit-test
77 { f } [ "foobar" "foo|bar" <regexp> matches? ] unit-test
78
79 { f } [ "" "(a)" <regexp> matches? ] unit-test
80 { t } [ "a" "(a)" <regexp> matches? ] unit-test
81 { f } [ "aa" "(a)" <regexp> matches? ] unit-test
82 { t } [ "aa" "(a*)" <regexp> matches? ] unit-test
83
84 { f } [ "aababaaabbac" "(a|b)+" <regexp> matches? ] unit-test
85 { t } [ "ababaaabba" "(a|b)+" <regexp> matches? ] unit-test
86
87 { f } [ "" "a{1}" <regexp> matches? ] unit-test
88 { t } [ "a" "a{1}" <regexp> matches? ] unit-test
89 { f } [ "aa" "a{1}" <regexp> matches? ] unit-test
90
91 { f } [ "a" "a{2,}" <regexp> matches? ] unit-test
92 { t } [ "aaa" "a{2,}" <regexp> matches? ] unit-test
93 { t } [ "aaaa" "a{2,}" <regexp> matches? ] unit-test
94 { t } [ "aaaaa" "a{2,}" <regexp> matches? ] unit-test
95
96 { t } [ "" "a{,2}" <regexp> matches? ] unit-test
97 { t } [ "a" "a{,2}" <regexp> matches? ] unit-test
98 { t } [ "aa" "a{,2}" <regexp> matches? ] unit-test
99 { f } [ "aaa" "a{,2}" <regexp> matches? ] unit-test
100 { f } [ "aaaa" "a{,2}" <regexp> matches? ] unit-test
101 { f } [ "aaaaa" "a{,2}" <regexp> matches? ] unit-test
102
103 { f } [ "" "a{1,3}" <regexp> matches? ] unit-test
104 { t } [ "a" "a{1,3}" <regexp> matches? ] unit-test
105 { t } [ "aa" "a{1,3}" <regexp> matches? ] unit-test
106 { t } [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
107 { f } [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
108
109 { f } [ "" "[a]" <regexp> matches? ] unit-test
110 { t } [ "a" "[a]" <regexp> matches? ] unit-test
111 { t } [ "a" "[abc]" <regexp> matches? ] unit-test
112 { f } [ "b" "[a]" <regexp> matches? ] unit-test
113 { f } [ "d" "[abc]" <regexp> matches? ] unit-test
114 { t } [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
115 { f } [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
116
117 { f } [ "" "[^a]" <regexp> matches? ] unit-test
118 { f } [ "a" "[^a]" <regexp> matches? ] unit-test
119 { f } [ "a" "[^abc]" <regexp> matches? ] unit-test
120 { t } [ "b" "[^a]" <regexp> matches? ] unit-test
121 { t } [ "d" "[^abc]" <regexp> matches? ] unit-test
122 { f } [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
123 { f } [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
124
125 { t } [ "]" "[]]" <regexp> matches? ] unit-test
126 { f } [ "]" "[^]]" <regexp> matches? ] unit-test
127 { t } [ "a" "[^]]" <regexp> matches? ] unit-test
128
129 [ "^" "[^]" <regexp> matches? ] must-fail
130 { t } [ "^" "[]^]" <regexp> matches? ] unit-test
131 { t } [ "]" "[]^]" <regexp> matches? ] unit-test
132
133 { t } [ "[" "[[]" <regexp> matches? ] unit-test
134 { f } [ "^" "[^^]" <regexp> matches? ] unit-test
135 { t } [ "a" "[^^]" <regexp> matches? ] unit-test
136
137 { t } [ "-" "[-]" <regexp> matches? ] unit-test
138 { f } [ "a" "[-]" <regexp> matches? ] unit-test
139 { f } [ "-" "[^-]" <regexp> matches? ] unit-test
140 { t } [ "a" "[^-]" <regexp> matches? ] unit-test
141
142 { t } [ "-" "[-a]" <regexp> matches? ] unit-test
143 { t } [ "a" "[-a]" <regexp> matches? ] unit-test
144 { t } [ "-" "[a-]" <regexp> matches? ] unit-test
145 { t } [ "a" "[a-]" <regexp> matches? ] unit-test
146 { f } [ "b" "[a-]" <regexp> matches? ] unit-test
147 { f } [ "-" "[^-]" <regexp> matches? ] unit-test
148 { t } [ "a" "[^-]" <regexp> matches? ] unit-test
149
150 { f } [ "-" "[a-c]" <regexp> matches? ] unit-test
151 { t } [ "-" "[^a-c]" <regexp> matches? ] unit-test
152 { t } [ "b" "[a-c]" <regexp> matches? ] unit-test
153 { f } [ "b" "[^a-c]" <regexp> matches? ] unit-test
154
155 { t } [ "-" "[a-c-]" <regexp> matches? ] unit-test
156 { f } [ "-" "[^a-c-]" <regexp> matches? ] unit-test
157
158 { t } [ "\\" "[\\\\]" <regexp> matches? ] unit-test
159 { f } [ "a" "[\\\\]" <regexp> matches? ] unit-test
160 { f } [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
161 { t } [ "a" "[^\\\\]" <regexp> matches? ] unit-test
162
163 { t } [ "0" "[\\d]" <regexp> matches? ] unit-test
164 { f } [ "a" "[\\d]" <regexp> matches? ] unit-test
165 { f } [ "0" "[^\\d]" <regexp> matches? ] unit-test
166 { t } [ "a" "[^\\d]" <regexp> matches? ] unit-test
167
168 { t } [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" <regexp> matches? ] unit-test
169 { t } [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" <regexp> matches? ] unit-test
170 { t } [ "a" "[a-z]{1,2}|[A-Z]{3,3}" <regexp> matches? ] unit-test
171
172 { t } [ "1000" "\\d{4,6}" <regexp> matches? ] unit-test
173 { t } [ "1000" "[0-9]{4,6}" <regexp> matches? ] unit-test
174
175 { t } [ "abc" "\\p{Lower}{3}" <regexp> matches? ] unit-test
176 { f } [ "ABC" "\\p{Lower}{3}" <regexp> matches? ] unit-test
177 { t } [ "ABC" "\\p{Upper}{3}" <regexp> matches? ] unit-test
178 { f } [ "abc" "\\p{Upper}{3}" <regexp> matches? ] unit-test
179 { f } [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
180 { t } [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
181
182 { t } [ "" "\\Q\\E" <regexp> matches? ] unit-test
183 { f } [ "a" "\\Q\\E" <regexp> matches? ] unit-test
184 { t } [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
185 { f } [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
186 { t } [ "s" "\\Qs\\E" <regexp> matches? ] unit-test
187
188 { t } [ "S" "\\0123" <regexp> matches? ] unit-test
189 { t } [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
190 { t } [ "x" "\\x78" <regexp> matches? ] unit-test
191 { f } [ "y" "\\x78" <regexp> matches? ] unit-test
192 { t } [ "x" "\\u0078" <regexp> matches? ] unit-test
193 { f } [ "y" "\\u0078" <regexp> matches? ] unit-test
194
195 { t } [ "ab" "a+b" <regexp> matches? ] unit-test
196 { f } [ "b" "a+b" <regexp> matches? ] unit-test
197 { t } [ "aab" "a+b" <regexp> matches? ] unit-test
198 { f } [ "abb" "a+b" <regexp> matches? ] unit-test
199
200 { t } [ "abbbb" "ab*" <regexp> matches? ] unit-test
201 { t } [ "a" "ab*" <regexp> matches? ] unit-test
202 { f } [ "abab" "ab*" <regexp> matches? ] unit-test
203
204 { f } [ "x" "\\." <regexp> matches? ] unit-test
205 { t } [ "." "\\." <regexp> matches? ] unit-test
206
207 { t } [ "aaaab" "a+ab" <regexp> matches? ] unit-test
208 { f } [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
209 { t } [ "aaacb" "a+cb" <regexp> matches? ] unit-test
210
211 { "aaa" } [ "aaacb" "a*" <regexp> first-match >string ] unit-test
212 { "aa" } [ "aaacb" "aa?" <regexp> first-match >string ] unit-test
213
214 { t } [ "aaa" R/ AAA/i matches? ] unit-test
215 { f } [ "aax" R/ AAA/i matches? ] unit-test
216 { t } [ "aaa" R/ A*/i matches? ] unit-test
217 { f } [ "aaba" R/ A*/i matches? ] unit-test
218 { t } [ "b" R/ [AB]/i matches? ] unit-test
219 { f } [ "c" R/ [AB]/i matches? ] unit-test
220 { t } [ "c" R/ [A-Z]/i matches? ] unit-test
221 { f } [ "3" R/ [A-Z]/i matches? ] unit-test
222
223 { t } [ "a" "(?i:a)" <regexp> matches? ] unit-test
224 { t } [ "a" "(?i:a)" <regexp> matches? ] unit-test
225 { t } [ "A" "(?i:a)" <regexp> matches? ] unit-test
226 { t } [ "A" "(?i:a)" <regexp> matches? ] unit-test
227
228 { t } [ "a" R/ (?-i:a)/i matches? ] unit-test
229 { t } [ "a" R/ (?-i:a)/i matches? ] unit-test
230 { f } [ "A" R/ (?-i:a)/i matches? ] unit-test
231 { f } [ "A" R/ (?-i:a)/i matches? ] unit-test
232
233 { f } [ "A" "[a-z]" <regexp> matches? ] unit-test
234 { t } [ "A" R/ [a-z]/i matches? ] unit-test
235
236 { f } [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
237 { t } [ "A" R/ \p{Lower}/i matches? ] unit-test
238
239 { t } [ "abc" R/ abc/r matches? ] unit-test
240 { t } [ "abc" R/ a[bB][cC]/r matches? ] unit-test
241
242 { t } [ 3 "xabc" R/ abc/r match-index-from >boolean ] unit-test
243 { t } [ 3 "xabc" R/ a[bB][cC]/r match-index-from >boolean ] unit-test
244
245 { 2 } [ 0 "llamallol" R/ ll/ match-index-from ] unit-test
246 { 5 } [ 8 "lolmallol" R/ lol/r match-index-from ] unit-test
247
248 { t } [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
249 { f } [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
250 { t } [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
251
252 { t } [ "abc*" "[^\\*]*\\*" <regexp> matches? ] unit-test
253 { t } [ "bca" "[^a]*a" <regexp> matches? ] unit-test
254
255 { } [
256     "(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
257     <regexp> drop
258 ] unit-test
259
260 { } [ "(\\$[\\p{XDigit}]|[\\p{Digit}])" <regexp> drop ] unit-test
261
262 ! Comment inside a regular expression
263 { t } [ "ac" "a(?#boo)c" <regexp> matches? ] unit-test
264
265 { } [ "USING: regexp kernel ; R/ -{3}[+]{1,6}(?:!!)?\\s/ drop" eval( -- ) ] unit-test
266
267 { } [ "USING: regexp kernel ; R/ (ftp|http|https):\\/\\/(\\w+:?\\w*@)?(\\S+)(:[0-9]+)?(\\/\\|\\/([\\w#!:.?+=&%@!\\-\\/]))?/ drop" eval( -- ) ] unit-test
268
269 { } [ "USING: regexp kernel ; R/ \\*[^\s*][^*]*\\*/ drop" eval( -- ) ] unit-test
270
271 { "ab" } [ "ab" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
272 { "abc" } [ "abc" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
273
274 { "ab" } [ "ab" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
275 { "abc" } [ "abc" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
276
277 { "b" } [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> first-match >string ] unit-test
278
279 { T{ slice { from 5 } { to 10 } { seq "hellohello" } } }
280 [ "hellohello" R/ hello/r first-match ]
281 unit-test
282
283 { { "1" "2" "3" "4" } }
284 [ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
285
286 { { "1" "2" "3" "4" "" } }
287 [ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
288
289 { { "" } } [ "" R/ =/ re-split [ >string ] map ] unit-test
290
291 { { "a" "" } } [ "a=" R/ =/ re-split [ >string ] map ] unit-test
292
293 { { "he" "o" } } [ "hello" R/ l+/ re-split [ >string ] map ] unit-test
294
295 { { "h" "llo" } } [ "hello" R/ e+/ re-split [ >string ] map ] unit-test
296
297 { { "" "h" "" "l" "l" "o" "" } } [ "hello" R/ e*/ re-split [ >string ] map ] unit-test
298
299 { { { 0 5 "hellohello" } { 5 10 "hellohello" } } }
300 [ "hellohello" R/ hello/ [ 3array ] map-matches ]
301 unit-test
302
303 { { { 5 10 "hellohello" } { 0 5 "hellohello" } } }
304 [ "hellohello" R/ hello/r [ 3array ] map-matches ]
305 unit-test
306
307 { { "ABC" "DEF" "GHI" } }
308 [ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matching-subseqs ] unit-test
309
310 { { "ee" "e" } } [ "heellohello" R/ e+/ all-matching-subseqs ] unit-test
311 { { "e" "ee" } } [ "heellohello" R/ e+/r all-matching-subseqs ] unit-test
312
313 { 3 } [ "1ABC2DEF3GHI4" R/ [A-Z]+/ count-matches ] unit-test
314
315 { 3 } [ "1ABC2DEF3GHI4" R/ [A-Z]+/r count-matches ] unit-test
316
317 { 1 } [ "" R/ / count-matches ] unit-test
318
319 { 1 } [ "" R/ /r count-matches ] unit-test
320
321 { 0 } [ "123" R/ [A-Z]+/ count-matches ] unit-test
322
323 { 0 } [ "123" R/ [A-Z]+/r count-matches ] unit-test
324
325 { 6 } [ "hello" R/ e*/ count-matches ] unit-test
326
327 { 6 } [ "hello" R/ e*/r count-matches ] unit-test
328
329 { 11 } [ "hello world" R/ l*/ count-matches ] unit-test
330
331 { 11 } [ "hello world" R/ l*/r count-matches ] unit-test
332
333 { 1 } [ "hello" R/ e+/ count-matches ] unit-test
334
335 { 2 } [ "hello world" R/ l+/r count-matches ] unit-test
336
337 { "1.2.3.4." } [ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ "." re-replace ] unit-test
338 { "XhXXlXlXoX XwXoXrXlXdX" } [ "hello world" R/ e*/ "X" re-replace ] unit-test
339 { "-- title --" } [ "== title ==" R/ =/ "-" re-replace ] unit-test
340
341 { "abc" } [ "a/   \\bc" "/.*\\" <regexp> "" re-replace ] unit-test
342 { "ac" } [ "a/   \\bc" R/ \/.*\\./ "" re-replace ] unit-test
343 { "abc" } [ "a/   \\bc" R/ \/.*\\/ "" re-replace ] unit-test
344
345 { "" } [ "ab" "a(?!b)" <regexp> first-match >string ] unit-test
346 { "a" } [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
347 { t } [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
348 { t } [ "foobar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
349 { t } [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
350 { f } [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
351 { "a" } [ "ab" "a(?=b)(?=b)" <regexp> first-match >string ] unit-test
352 { "a" } [ "ba" "(?<=b)(?<=b)a" <regexp> first-match >string ] unit-test
353 { "a" } [ "cab" "(?<=c)a(?=b)" <regexp> first-match >string ] unit-test
354
355 { 3 } [ "foobar" "foo(?=bar)" <regexp> first-match length ] unit-test
356 { f } [ "foobxr" "foo(?=bar)" <regexp> first-match ] unit-test
357
358 ! Bug in parsing word
359 { t } [ "a" R/ a/ matches? ] unit-test
360
361 ! Testing negation
362 { f } [ "a" R/ (?~a)/ matches? ] unit-test
363 { t } [ "aa" R/ (?~a)/ matches? ] unit-test
364 { t } [ "bb" R/ (?~a)/ matches? ] unit-test
365 { t } [ "" R/ (?~a)/ matches? ] unit-test
366
367 { f } [ "a" R/ (?~a+|b)/ matches? ] unit-test
368 { f } [ "aa" R/ (?~a+|b)/ matches? ] unit-test
369 { t } [ "bb" R/ (?~a+|b)/ matches? ] unit-test
370 { f } [ "b" R/ (?~a+|b)/ matches? ] unit-test
371 { t } [ "" R/ (?~a+|b)/ matches? ] unit-test
372
373 ! Intersecting classes
374 { t } [ "ab" R/ ac|\p{Lower}b/ matches? ] unit-test
375 { t } [ "ab" R/ ac|[a-z]b/ matches? ] unit-test
376 { t } [ "ac" R/ ac|\p{Lower}b/ matches? ] unit-test
377 { t } [ "ac" R/ ac|[a-z]b/ matches? ] unit-test
378 { t } [ "ac" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
379 { t } [ "ab" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
380 { t } [ "πb" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
381 { f } [ "πc" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
382 { f } [ "Ab" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
383
384 { t } [ "aaaa" R/ .*a./ matches? ] unit-test
385
386 { f } [ "ab" R/ (?~ac|\p{Lower}b)/ matches? ] unit-test
387 { f } [ "ab" R/ (?~ac|[a-z]b)/ matches? ] unit-test
388 { f } [ "ac" R/ (?~ac|\p{Lower}b)/ matches? ] unit-test
389 { f } [ "ac" R/ (?~ac|[a-z]b)/ matches? ] unit-test
390 { f } [ "ac" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
391 { f } [ "ab" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
392 { f } [ "πb" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
393 { t } [ "πc" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
394 { t } [ "Ab" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
395
396 ! DFA is compiled when needed, or when literal
397 { regexp-initial-word } [ "foo" <regexp> dfa>> ] unit-test
398 { f } [ R/ foo/ dfa>> \ regexp-initial-word = ] unit-test
399
400 { t } [ "a" R/ ^a/ matches? ] unit-test
401 { f } [ "\na" R/ ^a/ matches? ] unit-test
402 { f } [ "\r\na" R/ ^a/ matches? ] unit-test
403 { f } [ "\ra" R/ ^a/ matches? ] unit-test
404
405 { 1 } [ "a" R/ ^a/ count-matches ] unit-test
406 { 0 } [ "\na" R/ ^a/ count-matches ] unit-test
407 { 0 } [ "\r\na" R/ ^a/ count-matches ] unit-test
408 { 0 } [ "\ra" R/ ^a/ count-matches ] unit-test
409
410 { t } [ "a" R/ a$/ matches? ] unit-test
411 { f } [ "a\n" R/ a$/ matches? ] unit-test
412 { f } [ "a\r" R/ a$/ matches? ] unit-test
413 { f } [ "a\r\n" R/ a$/ matches? ] unit-test
414
415 { 1 } [ "a" R/ a$/ count-matches ] unit-test
416 { 0 } [ "a\n" R/ a$/ count-matches ] unit-test
417 { 0 } [ "a\r" R/ a$/ count-matches ] unit-test
418 { 0 } [ "a\r\n" R/ a$/ count-matches ] unit-test
419
420 { t } [ "a" R/ a$|b$/ matches? ] unit-test
421 { t } [ "b" R/ a$|b$/ matches? ] unit-test
422 { f } [ "ab" R/ a$|b$/ matches? ] unit-test
423 { t } [ "ba" R/ ba$|b$/ matches? ] unit-test
424
425 { t } [ "a" R/ \Aa/ matches? ] unit-test
426 { f } [ "\na" R/ \Aaa/ matches? ] unit-test
427 { f } [ "\r\na" R/ \Aa/ matches? ] unit-test
428 { f } [ "\ra" R/ \Aa/ matches? ] unit-test
429
430 { t } [ "a" R/ \Aa/m matches? ] unit-test
431 { f } [ "\na" R/ \Aaa/m matches? ] unit-test
432 { f } [ "\r\na" R/ \Aa/m matches? ] unit-test
433 { f } [ "\ra" R/ \Aa/m matches? ] unit-test
434 { 0 } [ "\ra" R/ \Aa/m count-matches ] unit-test
435
436 { f } [ "\r\n\n\n\nam" R/ ^am/m matches? ] unit-test
437 { 1 } [ "\r\n\n\n\nam" R/ ^am/m count-matches ] unit-test
438
439 { t } [ "a" R/ \Aa\z/m matches? ] unit-test
440 { f } [ "a\n" R/ \Aa\z/m matches? ] unit-test
441
442 { f } [ "a\r\n" R/ \Aa\Z/m matches? ] unit-test
443 { f } [ "a\n" R/ \Aa\Z/m matches? ] unit-test
444 { 1 } [ "a\r\n" R/ \Aa\Z/m count-matches ] unit-test
445 { 1 } [ "a\n" R/ \Aa\Z/m count-matches ] unit-test
446
447 { t } [ "a" R/ \Aa\Z/m matches? ] unit-test
448 { f } [ "\na" R/ \Aaa\Z/m matches? ] unit-test
449 { f } [ "\r\na" R/ \Aa\Z/m matches? ] unit-test
450 { f } [ "\ra" R/ \Aa\Z/m matches? ] unit-test
451
452 { 1 } [ "a" R/ \Aa\Z/m count-matches ] unit-test
453 { 0 } [ "\na" R/ \Aaa\Z/m count-matches ] unit-test
454 { 0 } [ "\r\na" R/ \Aa\Z/m count-matches ] unit-test
455 { 0 } [ "\ra" R/ \Aa\Z/m count-matches ] unit-test
456
457 { t } [ "a" R/ ^a/m matches? ] unit-test
458 { f } [ "\na" R/ ^a/m matches? ] unit-test
459 { 1 } [ "\na" R/ ^a/m count-matches ] unit-test
460 { 1 } [ "\r\na" R/ ^a/m count-matches ] unit-test
461 { 1 } [ "\ra" R/ ^a/m count-matches ] unit-test
462
463 { t } [ "a" R/ a$/m matches? ] unit-test
464 { f } [ "a\n" R/ a$/m matches? ] unit-test
465 { 1 } [ "a\n" R/ a$/m count-matches ] unit-test
466 { 1 } [ "a\r" R/ a$/m count-matches ] unit-test
467 { 1 } [ "a\r\n" R/ a$/m count-matches ] unit-test
468
469 { f } [ "foobxr" "foo\\z" <regexp> first-match ] unit-test
470 { 3 } [ "foo" "foo\\z" <regexp> first-match length ] unit-test
471
472 { t } [ "a foo b" R/ foo/ re-contains? ] unit-test
473 { f } [ "a bar b" R/ foo/ re-contains? ] unit-test
474 { t } [ "foo" R/ foo/ re-contains? ] unit-test
475
476 { { "foo" "fxx" "fab" } } [ "fab fxx foo" R/ f../r all-matching-subseqs ] unit-test
477
478 { t } [ "foo" "\\bfoo\\b" <regexp> re-contains? ] unit-test
479 { t } [ "afoob" "\\Bfoo\\B" <regexp> re-contains? ] unit-test
480 { f } [ "afoob" "\\bfoo\\b" <regexp> re-contains? ] unit-test
481 { f } [ "foo" "\\Bfoo\\B" <regexp> re-contains? ] unit-test
482
483 { 3 } [ "foo bar" "foo\\b" <regexp> first-match length ] unit-test
484 { f } [ "fooxbar" "foo\\b" <regexp> re-contains? ] unit-test
485 { t } [ "foo" "foo\\b" <regexp> re-contains? ] unit-test
486 { t } [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
487 { f } [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
488 { f } [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
489
490 { f } [ "foo bar" "foo\\B" <regexp> re-contains? ] unit-test
491 { 3 } [ "fooxbar" "foo\\B" <regexp> first-match length ] unit-test
492 { f } [ "foo" "foo\\B" <regexp> re-contains? ] unit-test
493 { f } [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
494 { t } [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
495 { f } [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
496
497 { t } [ "ab" "a(?=b*)" <regexp> re-contains? ] unit-test
498 { t } [ "abbbbbc" "a(?=b*c)" <regexp> re-contains? ] unit-test
499 { f } [ "abbbbb" "a(?=b*c)" <regexp> re-contains? ] unit-test
500 { t } [ "ab" "a(?=b*)" <regexp> re-contains? ] unit-test
501
502 { "az" } [ "baz" "(?<=b)(az)" <regexp> first-match >string ] unit-test
503 { f } [ "chaz" "(?<=b)(az)" <regexp> re-contains? ] unit-test
504 { "a" } [ "cbaz" "(?<=b*)a" <regexp> first-match >string ] unit-test
505 { f } [ "baz" "a(?<=b)" <regexp> re-contains? ] unit-test
506
507 { f } [ "baz" "(?<!b)a" <regexp> re-contains? ] unit-test
508 { t } [ "caz" "(?<!b)a" <regexp> re-contains? ] unit-test
509
510 { "abcd" } [ "abcdefg" "a(?=bcdefg)bcd" <regexp> first-match >string ] unit-test
511 { t } [ "abcdefg" "a(?#bcdefg)bcd" <regexp> re-contains? ] unit-test
512 { t } [ "abcdefg" "a(?:bcdefg)" <regexp> matches? ] unit-test
513
514 { 3 } [ "caba" "(?<=b)a" <regexp> first-match from>> ] unit-test
515
516 { t } [ "\ra" R/ .^a/ms matches? ] unit-test
517 { f } [ "\ra" R/ .^a/mds matches? ] unit-test
518 { t } [ "\na" R/ .^a/ms matches? ] unit-test
519 { t } [ "\na" R/ .^a/mds matches? ] unit-test
520
521 { t } [ "a\r" R/ a$./ms matches? ] unit-test
522 { f } [ "a\r" R/ a$./mds matches? ] unit-test
523 { t } [ "a\n" R/ a$./ms matches? ] unit-test
524 { t } [ "a\n" R/ a$./mds matches? ] unit-test
525
526 ! Unicode categories
527 { t } [ "a" R/ \p{L}/ matches? ] unit-test
528 { t } [ "A" R/ \p{L}/ matches? ] unit-test
529 { f } [ " " R/ \p{L}/ matches? ] unit-test
530 { f } [ "a" R/ \P{L}/ matches? ] unit-test
531 { f } [ "A" R/ \P{L}/ matches? ] unit-test
532 { t } [ " " R/ \P{L}/ matches? ] unit-test
533
534 { t } [ "a" R/ \p{Ll}/ matches? ] unit-test
535 { f } [ "A" R/ \p{Ll}/ matches? ] unit-test
536 { f } [ " " R/ \p{Ll}/ matches? ] unit-test
537 { f } [ "a" R/ \P{Ll}/ matches? ] unit-test
538 { t } [ "A" R/ \P{Ll}/ matches? ] unit-test
539 { t } [ " " R/ \P{Ll}/ matches? ] unit-test
540
541 { t } [ "a" R/ \p{script=Latin}/ matches? ] unit-test
542 { f } [ " " R/ \p{script=Latin}/ matches? ] unit-test
543 { f } [ "a" R/ \P{script=Latin}/ matches? ] unit-test
544 { t } [ " " R/ \P{script=Latin}/ matches? ] unit-test
545
546 ! These should be case-insensitive
547 { f } [ " " R/ \p{l}/ matches? ] unit-test
548 { f } [ "a" R/ \P{l}/ matches? ] unit-test
549 { f } [ "a" R/ \P{ll}/ matches? ] unit-test
550 { t } [ " " R/ \P{LL}/ matches? ] unit-test
551 { f } [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test
552 { t } [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test
553
554 ! Logical operators
555 { t } [ "a" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
556 { t } [ "π" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
557 { t } [ "A" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
558 { f } [ "3" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
559
560 { t } [ "a" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
561 { t } [ "π" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
562 { t } [ "A" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
563 { f } [ "3" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
564
565 { t } [ "a" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
566 { f } [ "π" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
567 { f } [ "A" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
568 { f } [ "3" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
569
570 { f } [ "a" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
571 { t } [ "π" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
572 { t } [ "A" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
573 { f } [ "3" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
574
575 { f } [ "a" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
576 { f } [ "π" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
577 { t } [ "A" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
578 { f } [ "3" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
579
580 { t } [ " " R/ \P{alpha}/ matches? ] unit-test
581 { f } [ "" R/ \P{alpha}/ matches? ] unit-test
582 { f } [ "a " R/ \P{alpha}/ matches? ] unit-test
583 { f } [ "a" R/ \P{alpha}/ matches? ] unit-test