]> gitweb.factorcode.org Git - factor.git/blob - extra/pcre/pcre-tests.factor
factor: rename [ ] [ ] unit-test -> { } [ ] unit-test using a refactoring tool!
[factor.git] / extra / pcre / pcre-tests.factor
1 USING: accessors arrays assocs continuations http.client kernel
2 literals math.ranges pcre pcre.ffi pcre.private random sequences
3 system tools.test ;
4 QUALIFIED: regexp
5 IN: pcre.tests
6
7 { { "Bords" "words" "word" } } [
8     "Bords, words, word." { ", " ", " "." } split-subseqs
9 ] unit-test
10
11 { { { 3 "day" } { 2 "month" } { 1 "year" } } } [
12     "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
13     <compiled-pcre> nametable>>
14 ] unit-test
15
16 CONSTANT: iso-date "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
17
18 ! On windows the erroffset appears to be set to 0 despite there being
19 ! nothing wrong with the regexp.
20 { t } [
21     "foo" (pcre) 3array rest { { f -1 } { f 0 } } member?
22 ] unit-test
23
24 { { 1 2 3 } } [
25     iso-date <pcre>
26     { "year" "month" "day" } [ pcre_get_stringnumber ] with map
27 ] unit-test
28
29 { t } [ "foo" <compiled-pcre> PCRE_UTF8 has-option? ] unit-test
30
31 os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE pcre-config ] unit-test ] when
32
33 ! In this day and age, not supporting utf-8 is broken.
34 { 1 } [ PCRE_CONFIG_UTF8 pcre-config ] unit-test
35
36 { 1 } [ PCRE_CONFIG_UNICODE_PROPERTIES pcre-config ] unit-test
37
38 ! Ok if these options throw if the pcre library is to old to support
39 ! these configuration parameters.
40 { t } [
41     [ PCRE_CONFIG_UTF16 pcre-config ] [ what>> ] recover
42     { 0 $ PCRE_CONFIG_UTF16 } member?
43 ] unit-test
44 { t } [
45     [ PCRE_CONFIG_UTF32 pcre-config ] [ what>> ] recover
46     { 0 $ PCRE_CONFIG_UTF32 } member?
47 ] unit-test
48
49 { 33 }
50 [
51     [ "foo" <pcre> f 33 pcre-fullinfo ] [ what>> ] recover
52 ] unit-test
53
54 ! Tests for findall
55 {
56     { { f "1999-01-12" } { "year" "1999" } { "month" "01" } { "day" "12" } }
57 } [
58     "1999-01-12" iso-date <compiled-pcre> findall first
59 ] unit-test
60
61 { 3 } [
62     "2003-10-09 1999-09-01 1514-10-20" iso-date <compiled-pcre> findall length
63 ] unit-test
64
65 { 5 } [ "abcdef" "[a-e]" findall length ] unit-test
66
67 { 3 } [ "foo bar baz" "foo|bar|baz" findall length ] unit-test
68
69 { 3 } [ "örjan är åtta" "[åäö]" findall length ] unit-test
70
71 { 3 } [ "ÅÄÖ" "\\p{Lu}" findall length ] unit-test
72
73 { 3 } [ "foobar" "foo(?=bar)" findall first first second length ] unit-test
74
75 { { { { f ", " } } { { f ", " } } { { f "." } } } } [
76     "Words, words, word." "\\W+" findall
77 ] unit-test
78
79 { { ", " ", " "." } } [
80     "Words, words, word." "\\W+" findall [ first second ] map
81 ] unit-test
82
83 : long-string ( -- x )
84     10000 [ CHAR: a CHAR: z [a,b] random ] "" replicate-as ;
85
86 ! Performance
87 { 0 } [ long-string ".{0,15}foobar.{0,10}" findall length ] unit-test
88
89 ! Empty matches, corner case behaviour is copied from pcredemo.c
90 { { { { f "foo" } } { { f "" } } } }
91 [ "foo" ".*" findall ] unit-test
92
93 { { { { f "" } } { { f "" } } { { f "" } } } }
94 [ "foo" "B*" findall ] unit-test
95
96 ! Empty matches in strings with multi-byte characters are tricky.
97 { { { { f "" } } { { f "" } } { { f "" } } { { f "" } } } }
98 [ "öööö" "x*" findall ] unit-test
99
100 ! Tests for matches?
101 { t } [ "örjan" "örjan" matches? ] unit-test
102
103 { t } [ "abcö" "\\p{Ll}{4}" matches? ] unit-test
104
105 ! Dotall mode, off by default
106 { f } [ "." <compiled-pcre> PCRE_DOTALL has-option? ] unit-test
107 { t } [ "(?s)." <compiled-pcre> PCRE_DOTALL has-option? ] unit-test
108
109 { f } [ "\n" "." matches? ] unit-test
110 { t } [ "\n" "(?s)." matches? ] unit-test
111
112 ! Caseless mode, off by default
113 { { f t } } [
114     { "x" "(?i)x" } [ <compiled-pcre> PCRE_CASELESS has-option? ] map
115 ] unit-test
116
117 ! Backreferences
118 { { t f } } [
119     { "response and responsibility" "sense and responsibility" }
120     [ "(sens|respons)e and \\1ibility" matches? ] map
121 ] unit-test
122
123 { { t t f } } [
124     { "rah rah" "RAH RAH" "RAH rah" } [ "((?i)rah)\\s+\\1" matches? ] map
125 ] unit-test
126
127 ! Splitting
128 { { { "Words" "words" "word" } { "Words" "words" "word" } } } [
129     "Words, words, word." { "\\W+" "[,. ]" } [ split ] with map
130 ] unit-test
131
132 ! Bigger tests
133 { t } [
134     "http://factorcode.org/" http-get nip
135     "href=\"(?P<link>[^\"]+)\"" findall [ "link" of ] map sequence?
136 ] unit-test
137
138 ! Test that the regexp syntax works.
139 { t } [ "1234abcd" regexp:R[ ^\d+\w+$] matches? ] unit-test