]> gitweb.factorcode.org Git - factor.git/blob - extra/pcre/pcre-tests.factor
pcre: using private to hide implementation details.
[factor.git] / extra / pcre / pcre-tests.factor
1 USING: accessors arrays assocs http.client kernel math.ranges
2 pcre pcre.ffi pcre.private random sequences system tools.test ;
3 QUALIFIED: regexp
4 IN: pcre.tests
5
6 [ { "Bords" "words" "word" } ] [
7     "Bords, words, word." { ", " ", " "." } split-subseqs
8 ] unit-test
9
10 [ { { 3 "day" } { 2 "month" } { 1 "year" } } ] [
11     "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
12     <compiled-pcre> nametable>>
13 ] unit-test
14
15 [ { 100 110 120 130 } ] [ 100 10 4 gen-array-addrs ] unit-test
16
17 CONSTANT: iso-date "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
18
19 ! On windows the erroffset appears to be set to 0 despite there being
20 ! nothing wrong with the regexp.
21 [ t ] [
22     "foo" (pcre) 3array 1 tail { { f -1 } { f 0 } } member?
23 ] unit-test
24
25 [ { 1 2 3 } ] [
26     iso-date <pcre>
27     { "year" "month" "day" } [ pcre_get_stringnumber ] with map
28 ] unit-test
29
30 [ t ] [ "foo" <compiled-pcre> PCRE_UTF8 has-option? ] unit-test
31
32 os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE config ] unit-test ] when
33
34 ! In this day and age, not supporting utf-8 is broken.
35 [ 1 ] [ PCRE_CONFIG_UTF8 config ] unit-test
36
37 [ 1 ] [ PCRE_CONFIG_UNICODE_PROPERTIES config ] unit-test
38
39 ! libpcre must not support 16 or 32 bit code points.
40 [ 0 ] [ PCRE_CONFIG_UTF16 config ] unit-test
41 [ 0 ] [ PCRE_CONFIG_UTF32 config ] unit-test
42
43 ! Tests for findall
44 [
45     { { f "1999-01-12" } { "year" "1999" } { "month" "01" } { "day" "12" } }
46 ] [
47     "1999-01-12" iso-date <compiled-pcre> findall first
48 ] unit-test
49
50 [ 3 ] [
51     "2003-10-09 1999-09-01 1514-10-20" iso-date <compiled-pcre> findall length
52 ] unit-test
53
54 [ 5 ] [ "abcdef" "[a-e]" findall length ] unit-test
55
56 [ 3 ] [ "foo bar baz" "foo|bar|baz" findall length ] unit-test
57
58 [ 3 ] [ "örjan är åtta" "[åäö]" findall length ] unit-test
59
60 [ 3 ] [ "ÅÄÖ" "\\p{Lu}" findall length ] unit-test
61
62 [ 3 ] [ "foobar" "foo(?=bar)" findall first first second length ] unit-test
63
64 [ { { { f ", " } } { { f ", " } } { { f "." } } } ] [
65     "Words, words, word." "\\W+" findall
66 ] unit-test
67
68 [ { ", " ", " "." } ] [
69     "Words, words, word." "\\W+" findall [ first second ] map
70 ] unit-test
71
72 : long-string ( -- x )
73     10000 [ CHAR: a CHAR: z [a,b] random ] "" replicate-as ;
74
75 ! Performance
76 [ 0 ] [ long-string ".{0,15}foobar.{0,10}" findall length ] unit-test
77
78 ! Empty matches, corner case behaviour is copied from pcredemo.c
79 [ { { { f "foo" } } { { f "" } } } ]
80 [ "foo" ".*" findall ] unit-test
81
82 [ { { { f "" } } { { f "" } } { { f "" } } } ]
83 [ "foo" "B*" findall ] unit-test
84
85 ! Empty matches in strings with multi-byte characters are tricky.
86 [ { { { f "" } } { { f "" } } { { f "" } } { { f "" } } } ]
87 [ "öööö" "x*" findall ] unit-test
88
89 ! Tests for matches?
90 [ t ] [ "örjan" "örjan" matches? ] unit-test
91
92 [ t ] [ "abcö" "\\p{Ll}{4}" matches? ] unit-test
93
94 ! Dotall mode, off by default
95 [ f ] [ "." <compiled-pcre> PCRE_DOTALL has-option? ] unit-test
96 [ t ] [ "(?s)." <compiled-pcre> PCRE_DOTALL has-option? ] unit-test
97
98 [ f ] [ "\n" "." matches? ] unit-test
99 [ t ] [ "\n" "(?s)." matches? ] unit-test
100
101 ! Caseless mode, off by default
102 [ { f t } ] [
103     { "x" "(?i)x" } [ <compiled-pcre> PCRE_CASELESS has-option? ] map
104 ] unit-test
105
106 ! Backreferences
107 [ { t f } ] [
108     { "response and responsibility" "sense and responsibility" }
109     [ "(sens|respons)e and \\1ibility" matches? ] map
110 ] unit-test
111
112 [ { t t f } ] [
113     { "rah rah" "RAH RAH" "RAH rah" } [ "((?i)rah)\\s+\\1" matches? ] map
114 ] unit-test
115
116 ! Splitting
117 [ { { "Words" "words" "word" } { "Words" "words" "word" } } ] [
118     "Words, words, word." { "\\W+" "[,. ]" } [ split ] with map
119 ] unit-test
120
121 ! Bigger tests
122 [ t ] [
123     "http://factorcode.org/" http-get nip
124     "href=\"(?P<link>[^\"]+)\"" findall [ "link" of ] map sequence?
125 ] unit-test
126
127 ! Test that the regexp syntax works.
128 [ t ] [ "1234abcd" regexp:R[ ^\d+\w+$] matches? ] unit-test