]> gitweb.factorcode.org Git - factor.git/blob - extra/pcre/pcre-tests.factor
pcre: ensure we aren't using 16 or 32 bit pcre because that's not tested
[factor.git] / extra / pcre / pcre-tests.factor
1 USING:
2     accessors
3     arrays
4     assocs
5     http.client
6     kernel
7     math math.ranges
8     pcre pcre.ffi pcre.info
9     random
10     sequences
11     system
12     tools.test ;
13 QUALIFIED: regexp
14 IN: pcre.tests
15
16 CONSTANT: iso-date "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
17
18 ! On windows the erroffset appears to be set to 0 despite there being
19 ! nothing wrong with the regexp.
20 [ t ] [
21     "foo" (pcre) 3array 1 tail { { f -1 } { f 0 } } member?
22 ] unit-test
23
24 [ { 1 2 3 } ] [
25     iso-date <pcre>
26     { "year" "month" "day" } [ pcre_get_stringnumber ] with map
27 ] unit-test
28
29 [ t ] [ "foo" <compiled-pcre> PCRE_UTF8 has-option? ] unit-test
30
31 os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE config ] unit-test ] when
32
33 ! In this day and age, not supporting utf-8 is broken.
34 [ 1 ] [ PCRE_CONFIG_UTF8 config ] unit-test
35
36 [ 1 ] [ PCRE_CONFIG_UNICODE_PROPERTIES config ] unit-test
37
38 ! libpcre must not support 16 or 32 bit code points.
39 [ 0 ] [ PCRE_CONFIG_UTF16 config ] unit-test
40 [ 0 ] [ PCRE_CONFIG_UTF32 config ] unit-test
41
42 ! Tests for findall
43 [
44     { { f "1999-01-12" } { "year" "1999" } { "month" "01" } { "day" "12" } }
45 ] [
46     "1999-01-12" iso-date <compiled-pcre> findall first
47 ] unit-test
48
49 [ 3 ] [
50     "2003-10-09 1999-09-01 1514-10-20" iso-date <compiled-pcre> findall length
51 ] unit-test
52
53 [ 5 ] [ "abcdef" "[a-e]" findall length ] unit-test
54
55 [ 3 ] [ "foo bar baz" "foo|bar|baz" findall length ] unit-test
56
57 [ 3 ] [ "örjan är åtta" "[åäö]" findall length ] unit-test
58
59 [ 3 ] [ "ÅÄÖ" "\\p{Lu}" findall length ] unit-test
60
61 [ 3 ] [ "foobar" "foo(?=bar)" findall first first second length ] unit-test
62
63 [ { { { f ", " } } { { f ", " } } { { f "." } } } ] [
64     "Words, words, word." "\\W+" findall
65 ] unit-test
66
67 [ { ", " ", " "." } ] [
68     "Words, words, word." "\\W+" findall [ first second ] map
69 ] unit-test
70
71 : long-string ( -- x )
72     10000 [ CHAR: a CHAR: z [a,b] random ] "" replicate-as ;
73
74 ! Performance
75 [ 0 ] [ long-string ".{0,15}foobar.{0,10}" findall length ] unit-test
76
77 ! Empty matches, corner case behaviour is copied from pcredemo.c
78 [ { { { f "foo" } } { { f "" } } } ]
79 [ "foo" ".*" findall ] unit-test
80
81 [ { { { f "" } } { { f "" } } { { f "" } } } ]
82 [ "foo" "B*" findall ] unit-test
83
84 ! Empty matches in strings with multi-byte characters are tricky.
85 [ { { { f "" } } { { f "" } } { { f "" } } { { f "" } } } ]
86 [ "öööö" "x*" findall ] unit-test
87
88 ! Tests for matches?
89 [ t ] [ "örjan" "örjan" matches? ] unit-test
90
91 [ t ] [ "abcö" "\\p{Ll}{4}" matches? ] unit-test
92
93 ! Dotall mode, off by default
94 [ f ] [ "." <compiled-pcre> PCRE_DOTALL has-option? ] unit-test
95 [ t ] [ "(?s)." <compiled-pcre> PCRE_DOTALL has-option? ] unit-test
96
97 [ f ] [ "\n" "." matches? ] unit-test
98 [ t ] [ "\n" "(?s)." matches? ] unit-test
99
100 ! Caseless mode, off by default
101 [ { f t } ] [
102     { "x" "(?i)x" } [ <compiled-pcre> PCRE_CASELESS has-option? ] map
103 ] unit-test
104
105 ! Backreferences
106 [ { t f } ] [
107     { "response and responsibility" "sense and responsibility" }
108     [ "(sens|respons)e and \\1ibility" matches? ] map
109 ] unit-test
110
111 [ { t t f } ] [
112     { "rah rah" "RAH RAH" "RAH rah" } [ "((?i)rah)\\s+\\1" matches? ] map
113 ] unit-test
114
115 ! Splitting
116 [ { { "Words" "words" "word" } { "Words" "words" "word" } } ] [
117     "Words, words, word." { "\\W+" "[,. ]" } [ split ] with map
118 ] unit-test
119
120 ! Bigger tests
121 [ t ] [
122     "http://factorcode.org/" http-get nip
123     "href=\"(?P<link>[^\"]+)\"" findall [ "link" of ] map sequence?
124 ] unit-test
125
126 ! Test that the regexp syntax works.
127 [ t ] [ "1234abcd" regexp:R[ ^\d+\w+$] matches? ] unit-test