]> gitweb.factorcode.org Git - factor.git/blob - extra/pcre/pcre-tests.factor
Harmonize spelling
[factor.git] / extra / pcre / pcre-tests.factor
1 USING: accessors arrays assocs continuations http.client kernel
2 literals math math.parser ranges pcre pcre.ffi pcre.private
3 random sequences system tools.test ;
4 QUALIFIED: regexp
5 QUALIFIED: splitting
6 IN: pcre.tests
7
8 { { "Bords" "words" "word" } } [
9     "Bords, words, word." { ", " ", " "." } split-subseqs
10 ] unit-test
11
12 { { { 3 "day" } { 2 "month" } { 1 "year" } } } [
13     "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
14     <compiled-pcre> nametable>>
15 ] unit-test
16
17 CONSTANT: iso-date "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
18
19 ! On windows the erroffset appears to be set to 0 despite there being
20 ! nothing wrong with the regexp.
21 { t } [
22     "foo" (pcre) 3array rest { { f -1 } { f 0 } } member?
23 ] unit-test
24
25 { { 1 2 3 } } [
26     iso-date <pcre>
27     { "year" "month" "day" } [ pcre_get_stringnumber ] with map
28 ] unit-test
29
30 { t } [
31     "foo" <compiled-pcre> PCRE_UTF8 has-option?
32 ] unit-test
33
34 ! This option is not present on old PCRE versions.
35 { t } [
36     "foo" <compiled-pcre> version 8.10 >
37     [ PCRE_UCP has-option? ] [ PCRE_UCP has-option? not ] if
38 ] unit-test
39
40 os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE pcre-config ] unit-test ] when
41
42 ! In this day and age, not supporting utf-8 is broken.
43 { 1 } [ PCRE_CONFIG_UTF8 pcre-config ] unit-test
44
45 { 1 } [ PCRE_CONFIG_UNICODE_PROPERTIES pcre-config ] unit-test
46
47 ! Ok if these options throw if the pcre library is to old to support
48 ! these configuration parameters.
49 { t } [
50     [ PCRE_CONFIG_UTF16 pcre-config ] [ what>> ] recover
51     { 0 $ PCRE_CONFIG_UTF16 } member?
52 ] unit-test
53 { t } [
54     [ PCRE_CONFIG_UTF32 pcre-config ] [ what>> ] recover
55     { 0 $ PCRE_CONFIG_UTF32 } member?
56 ] unit-test
57
58 { 33 }
59 [
60     [ "foo" <pcre> f 33 pcre-fullinfo ] [ what>> ] recover
61 ] unit-test
62
63 ! Tests for findall
64 {
65     { { f "1999-01-12" } { "year" "1999" } { "month" "01" } { "day" "12" } }
66 } [
67     "1999-01-12" iso-date <compiled-pcre> findall first
68 ] unit-test
69
70 { 3 } [
71     "2003-10-09 1999-09-01 1514-10-20" iso-date <compiled-pcre> findall length
72 ] unit-test
73
74 { 5 } [ "abcdef" "[a-e]" findall length ] unit-test
75
76 { 3 } [ "foo bar baz" "foo|bar|baz" findall length ] unit-test
77
78 { 3 } [ "örjan är åtta" "[åäö]" findall length ] unit-test
79
80 { 3 } [ "ÅÄÖ" "\\p{Lu}" findall length ] unit-test
81
82 { 3 } [ "foobar" "foo(?=bar)" findall first first second length ] unit-test
83
84 { { { { f ", " } } { { f ", " } } { { f "." } } } } [
85     "Words, words, word." "\\W+" findall
86 ] unit-test
87
88 { { ", " ", " "." } } [
89     "Words, words, word." "\\W+" findall [ first second ] map
90 ] unit-test
91
92 : long-string ( -- x )
93     10000 [ CHAR: a CHAR: z [a..b] random ] "" replicate-as ;
94
95 ! Performance
96 { 0 } [ long-string ".{0,15}foobar.{0,10}" findall length ] unit-test
97
98 ! Empty matches, corner case behavior is copied from pcredemo.c
99 { { { { f "foo" } } { { f "" } } } }
100 [ "foo" ".*" findall ] unit-test
101
102 { { { { f "" } } { { f "" } } { { f "" } } } }
103 [ "foo" "B*" findall ] unit-test
104
105 ! Empty matches in strings with multi-byte characters are tricky.
106 { { { { f "" } } { { f "" } } { { f "" } } { { f "" } } } }
107 [ "öööö" "x*" findall ] unit-test
108
109 ! Tests for matches?
110 { t } [ "örjan" "örjan" matches? ] unit-test
111
112 { t } [ "abcö" "\\p{Ll}{4}" matches? ] unit-test
113
114 ! This used to work in 8.36, but might have changed in later versions.
115 ! See: https://bugs.exim.org/show_bug.cgi?id=1875
116 version 8.36 <= [
117     { t t } [
118         "(?s)." <compiled-pcre> PCRE_DOTALL has-option?
119         "(?i)x" <compiled-pcre> PCRE_CASELESS has-option?
120     ] unit-test
121 ] when
122
123 { f } [ "\n" "." matches? ] unit-test
124 { t } [ "\n" "(?s)." matches? ] unit-test
125
126 { f t } [
127     "hello\nthere" "^.*$" <compiled-pcre> matches?
128     "hello\nthere" "(?s)^.*$" <compiled-pcre> matches?
129 ] unit-test
130
131 ! Modes off by default
132 { f f } [
133     ! Caseless mode
134     "x" <compiled-pcre> PCRE_CASELESS has-option?
135     ! Dotall mode
136     "." <compiled-pcre> PCRE_DOTALL has-option?
137 ] unit-test
138
139 ! Backreferences
140 { { t f } } [
141     { "response and responsibility" "sense and responsibility" }
142     [ "(sens|respons)e and \\1ibility" matches? ] map
143 ] unit-test
144
145 { { t t f } } [
146     { "rah rah" "RAH RAH" "RAH rah" } [ "((?i)rah)\\s+\\1" matches? ] map
147 ] unit-test
148
149 ! Splitting
150 { { { "Words" "words" "word" } { "Words" "words" "word" } } } [
151     "Words, words, word." { "\\W+" "[,. ]" } [ split ] with map
152 ] unit-test
153
154 ! Bigger tests
155 { t } [
156     "https://factorcode.org/" http-get nip
157     "href=\"(?P<link>[^\"]+)\"" findall [ "link" of ] map sequence?
158 ] unit-test
159
160 ! Test that the regexp syntax works.
161 { t } [ "1234abcd" regexp:R/ ^\d+\w+$/ matches? ] unit-test