]> gitweb.factorcode.org Git - factor.git/commitdiff
pcre: moved from basis/ to extra/
authorBjörn Lindqvist <bjourne@gmail.com>
Wed, 6 Nov 2013 17:05:37 +0000 (18:05 +0100)
committerJohn Benediktsson <mrjbq7@gmail.com>
Wed, 20 Nov 2013 02:13:39 +0000 (18:13 -0800)
16 files changed:
basis/pcre/ffi/ffi-tests.factor [deleted file]
basis/pcre/ffi/ffi.factor [deleted file]
basis/pcre/info/info-tests.factor [deleted file]
basis/pcre/info/info.factor [deleted file]
basis/pcre/pcre-tests.factor [deleted file]
basis/pcre/pcre.factor [deleted file]
basis/pcre/utils/utils-tests.factor [deleted file]
basis/pcre/utils/utils.factor [deleted file]
extra/pcre/ffi/ffi-tests.factor [new file with mode: 0644]
extra/pcre/ffi/ffi.factor [new file with mode: 0644]
extra/pcre/info/info-tests.factor [new file with mode: 0644]
extra/pcre/info/info.factor [new file with mode: 0644]
extra/pcre/pcre-tests.factor [new file with mode: 0644]
extra/pcre/pcre.factor [new file with mode: 0644]
extra/pcre/utils/utils-tests.factor [new file with mode: 0644]
extra/pcre/utils/utils.factor [new file with mode: 0644]

diff --git a/basis/pcre/ffi/ffi-tests.factor b/basis/pcre/ffi/ffi-tests.factor
deleted file mode 100644 (file)
index 491ae1e..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-USING: pcre.ffi sequences splitting tools.test ;
-IN: pcre.ffi.tests
-
-[ 2 ] [ pcre_version " " split length ] unit-test
diff --git a/basis/pcre/ffi/ffi.factor b/basis/pcre/ffi/ffi.factor
deleted file mode 100644 (file)
index 6a31888..0000000
+++ /dev/null
@@ -1,188 +0,0 @@
-USING:
-    alien alien.c-types alien.data alien.libraries alien.syntax
-    classes.struct
-    combinators
-    system ;
-IN: pcre.ffi
-
-! http://sourceforge.net/projects/gnuwin32/files/pcre/7.0/pcre-7.0-bin.zip/download
-
-<< "pcre" {
-    { [ os unix? ] [ "libpcre.so" ] }
-    { [ os windows? ] [ "pcre3.dll" ] }
-} cond cdecl add-library >>
-
-LIBRARY: pcre
-
-CONSTANT: PCRE_CASELESS           0x00000001
-CONSTANT: PCRE_MULTILINE          0x00000002
-CONSTANT: PCRE_DOTALL             0x00000004
-CONSTANT: PCRE_EXTENDED           0x00000008
-CONSTANT: PCRE_ANCHORED           0x00000010
-CONSTANT: PCRE_DOLLAR_ENDONLY     0x00000020
-CONSTANT: PCRE_EXTRA              0x00000040
-CONSTANT: PCRE_NOTBOL             0x00000080
-CONSTANT: PCRE_NOTEOL             0x00000100
-CONSTANT: PCRE_UNGREEDY           0x00000200
-CONSTANT: PCRE_NOTEMPTY           0x00000400
-CONSTANT: PCRE_UTF8               0x00000800
-CONSTANT: PCRE_NO_AUTO_CAPTURE    0x00001000
-CONSTANT: PCRE_NO_UTF8_CHECK      0x00002000
-CONSTANT: PCRE_AUTO_CALLOUT       0x00004000
-CONSTANT: PCRE_PARTIAL_SOFT       0x00008000
-CONSTANT: PCRE_PARTIAL            0x00008000
-CONSTANT: PCRE_DFA_SHORTEST       0x00010000
-CONSTANT: PCRE_DFA_RESTART        0x00020000
-CONSTANT: PCRE_FIRSTLINE          0x00040000
-CONSTANT: PCRE_DUPNAMES           0x00080000
-CONSTANT: PCRE_NEWLINE_CR         0x00100000
-CONSTANT: PCRE_NEWLINE_LF         0x00200000
-CONSTANT: PCRE_NEWLINE_CRLF       0x00300000
-CONSTANT: PCRE_NEWLINE_ANY        0x00400000
-CONSTANT: PCRE_NEWLINE_ANYCRLF    0x00500000
-CONSTANT: PCRE_BSR_ANYCRLF        0x00800000
-CONSTANT: PCRE_BSR_UNICODE        0x01000000
-CONSTANT: PCRE_JAVASCRIPT_COMPAT  0x02000000
-CONSTANT: PCRE_NO_START_OPTIMIZE  0x04000000
-CONSTANT: PCRE_NO_START_OPTIMISE  0x04000000
-CONSTANT: PCRE_PARTIAL_HARD       0x08000000
-CONSTANT: PCRE_NOTEMPTY_ATSTART   0x10000000
-CONSTANT: PCRE_UCP                0x20000000
-
-ENUM: PCRE_ERRORS
-    { PCRE_ERROR_NOMATCH         -1 }
-    { PCRE_ERROR_NULL            -2 }
-    { PCRE_ERROR_BADOPTION       -3 }
-    { PCRE_ERROR_BADMAGIC        -4 }
-    { PCRE_ERROR_UNKNOWN_OPCODE  -5 }
-    { PCRE_ERROR_UNKNOWN_NODE    -5 }
-    { PCRE_ERROR_NOMEMORY        -6 }
-    { PCRE_ERROR_NOSUBSTRING     -7 }
-    { PCRE_ERROR_MATCHLIMIT      -8 }
-    { PCRE_ERROR_CALLOUT         -9 }
-    { PCRE_ERROR_BADUTF8        -10 }
-    { PCRE_ERROR_BADUTF8_OFFSET -11 }
-    { PCRE_ERROR_PARTIAL        -12 }
-    { PCRE_ERROR_BADPARTIAL     -13 }
-    { PCRE_ERROR_INTERNAL       -14 }
-    { PCRE_ERROR_BADCOUNT       -15 }
-    { PCRE_ERROR_DFA_UITEM      -16 }
-    { PCRE_ERROR_DFA_UCOND      -17 }
-    { PCRE_ERROR_DFA_UMLIMIT    -18 }
-    { PCRE_ERROR_DFA_WSSIZE     -19 }
-    { PCRE_ERROR_DFA_RECURSE    -20 }
-    { PCRE_ERROR_RECURSIONLIMIT -21 }
-    { PCRE_ERROR_NULLWSLIMIT    -22 }
-    { PCRE_ERROR_BADNEWLINE     -23 }
-    { PCRE_ERROR_BADOFFSET      -24 }
-    { PCRE_ERROR_SHORTUTF8      -25 } ;
-
-CONSTANT: PCRE_ERROR_NOMATCH         -1
-CONSTANT: PCRE_ERROR_NULL            -2
-CONSTANT: PCRE_ERROR_BADOPTION       -3
-CONSTANT: PCRE_ERROR_BADMAGIC        -4
-CONSTANT: PCRE_ERROR_UNKNOWN_OPCODE  -5
-CONSTANT: PCRE_ERROR_UNKNOWN_NODE    -5
-CONSTANT: PCRE_ERROR_NOMEMORY        -6
-CONSTANT: PCRE_ERROR_NOSUBSTRING     -7
-CONSTANT: PCRE_ERROR_MATCHLIMIT      -8
-CONSTANT: PCRE_ERROR_CALLOUT         -9
-CONSTANT: PCRE_ERROR_BADUTF8        -10
-CONSTANT: PCRE_ERROR_BADUTF8_OFFSET -11
-CONSTANT: PCRE_ERROR_PARTIAL        -12
-CONSTANT: PCRE_ERROR_BADPARTIAL     -13
-CONSTANT: PCRE_ERROR_INTERNAL       -14
-CONSTANT: PCRE_ERROR_BADCOUNT       -15
-CONSTANT: PCRE_ERROR_DFA_UITEM      -16
-CONSTANT: PCRE_ERROR_DFA_UCOND      -17
-CONSTANT: PCRE_ERROR_DFA_UMLIMIT    -18
-CONSTANT: PCRE_ERROR_DFA_WSSIZE     -19
-CONSTANT: PCRE_ERROR_DFA_RECURSE    -20
-CONSTANT: PCRE_ERROR_RECURSIONLIMIT -21
-CONSTANT: PCRE_ERROR_NULLWSLIMIT    -22
-CONSTANT: PCRE_ERROR_BADNEWLINE     -23
-CONSTANT: PCRE_ERROR_BADOFFSET      -24
-CONSTANT: PCRE_ERROR_SHORTUTF8      -25
-
-CONSTANT: PCRE_INFO_OPTIONS            0
-CONSTANT: PCRE_INFO_SIZE               1
-CONSTANT: PCRE_INFO_CAPTURECOUNT       2
-CONSTANT: PCRE_INFO_BACKREFMAX         3
-CONSTANT: PCRE_INFO_FIRSTBYTE          4
-CONSTANT: PCRE_INFO_FIRSTCHAR          4
-CONSTANT: PCRE_INFO_FIRSTTABLE         5
-CONSTANT: PCRE_INFO_LASTLITERAL        6
-CONSTANT: PCRE_INFO_NAMEENTRYSIZE      7
-CONSTANT: PCRE_INFO_NAMECOUNT          8
-CONSTANT: PCRE_INFO_NAMETABLE          9
-CONSTANT: PCRE_INFO_STUDYSIZE         10
-CONSTANT: PCRE_INFO_DEFAULT_TABLES    11
-CONSTANT: PCRE_INFO_OKPARTIAL         12
-CONSTANT: PCRE_INFO_JCHANGED          13
-CONSTANT: PCRE_INFO_HASCRORLF         14
-CONSTANT: PCRE_INFO_MINLENGTH         15
-
-CONSTANT: PCRE_CONFIG_UTF8                    0
-CONSTANT: PCRE_CONFIG_NEWLINE                 1
-CONSTANT: PCRE_CONFIG_LINK_SIZE               2
-CONSTANT: PCRE_CONFIG_POSIX_MALLOC_THRESHOLD  3
-CONSTANT: PCRE_CONFIG_MATCH_LIMIT             4
-CONSTANT: PCRE_CONFIG_STACKRECURSE            5
-CONSTANT: PCRE_CONFIG_UNICODE_PROPERTIES      6
-CONSTANT: PCRE_CONFIG_MATCH_LIMIT_RECURSION   7
-CONSTANT: PCRE_CONFIG_BSR                     8
-
-
-STRUCT: pcre_extra
-    { flags int }
-    { study_data void* }
-    { match_limit long }
-    { callout_data void* }
-    { tables uchar* }
-    { match_limit_recursion int }
-    { mark uchar** } ;
-
-FUNCTION: void pcre_config ( int what, void* where ) ;
-
-FUNCTION: void* pcre_compile ( c-string pattern,
-                               int options,
-                               char** errptr,
-                               int* erroffset,
-                               char* tableptr ) ;
-
-FUNCTION: void* pcre_compile2 ( c-string pattern,
-                                int options,
-                                int* errcodeptr,
-                                char** errptr,
-                                int* erroffset,
-                                char* tableptr ) ;
-
-FUNCTION: int pcre_info ( void* pcre, int* optptr, int* first_byte ) ;
-FUNCTION: int pcre_fullinfo ( void* pcre, pcre_extra* extra, int what, void *where ) ;
-
-FUNCTION: pcre_extra* pcre_study ( void* pcre, int options, char** errptr ) ;
-FUNCTION: int pcre_exec ( void* pcre,
-                          pcre_extra* extra,
-                          c-string subject,
-                          int length,
-                          int startoffset,
-                          int options,
-                          int* ovector,
-                          int ovecsize ) ;
-
-FUNCTION: int pcre_get_stringnumber ( void* pcre, c-string name ) ;
-
-FUNCTION: int pcre_get_substring ( c-string subject,
-                                   int* ovector,
-                                   int stringcount,
-                                   int stringnumber,
-                                   void *stringptr ) ;
-
-FUNCTION: int pcre_get_substring_list ( c-string subject,
-                                        int* ovector,
-                                        int stringcount,
-                                        void *stringptr ) ;
-
-FUNCTION: c-string pcre_version ( ) ;
-
-FUNCTION: uchar* pcre_maketables ( ) ;
diff --git a/basis/pcre/info/info-tests.factor b/basis/pcre/info/info-tests.factor
deleted file mode 100644 (file)
index be689fd..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-USING: accessors pcre pcre.info pcre.utils sequences tools.test ;
-
-[ { { 3 "day" } { 2 "month" } { 1 "year" } } ]
-[
-    "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})" <compiled-pcre>
-    nametable>>
-] unit-test
-
-[ { 100 110 120 130 } ] [ 100 10 4 gen-array-addrs ] unit-test
diff --git a/basis/pcre/info/info.factor b/basis/pcre/info/info.factor
deleted file mode 100644 (file)
index 29a4c40..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-USING:
-    accessors
-    alien alien.accessors alien.c-types alien.data alien.strings
-    arrays
-    io.encodings.utf8
-    kernel
-    math
-    pcre.ffi pcre.utils
-    sequences ;
-IN: pcre.info
-
-! Mostly internal
-: fullinfo ( pcre extra what -- obj )
-    { int } [ pcre_fullinfo ] with-out-parameters nip ;
-
-: name-count ( pcre extra -- n )
-    PCRE_INFO_NAMECOUNT fullinfo ;
-
-: name-table ( pcre extra -- addr )
-    PCRE_INFO_NAMETABLE fullinfo ;
-
-: name-entry-size ( pcre extra -- size )
-    PCRE_INFO_NAMEENTRYSIZE fullinfo ;
-
-: name-table-entry ( addr -- group-index group-name )
-    [ <alien> 1 alien-unsigned-1 ] [ 2 + <alien> utf8 alien>string ] bi ;
-
-: options ( pcre -- opts )
-    f PCRE_INFO_OPTIONS fullinfo ;
-
-! Exported
-: name-table-entries ( pcre extra -- addrs )
-    [ name-table ] [ name-entry-size ] [ name-count ] 2tri gen-array-addrs
-    [ name-table-entry 2array ] map ;
diff --git a/basis/pcre/pcre-tests.factor b/basis/pcre/pcre-tests.factor
deleted file mode 100644 (file)
index 3963c5b..0000000
+++ /dev/null
@@ -1,123 +0,0 @@
-USING:
-    accessors
-    arrays
-    assocs
-    http.client
-    kernel
-    math math.ranges
-    pcre pcre.ffi pcre.info
-    random
-    sequences
-    system
-    tools.test ;
-QUALIFIED: regexp
-IN: pcre.tests
-
-CONSTANT: iso-date "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
-
-! On windows the erroffset appears to be set to 0 despite there being
-! nothing wrong with the regexp.
-[ t ] [
-    "foo" (pcre) 3array 1 tail { { f -1 } { f 0 } } member?
-] unit-test
-
-[ { 1 2 3 } ] [
-    iso-date <pcre>
-    { "year" "month" "day" } [ pcre_get_stringnumber ] with map
-] unit-test
-
-[ t ] [ "foo" <compiled-pcre> PCRE_UTF8 has-option? ] unit-test
-
-os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE config ] unit-test ] when
-
-! In this day and age, not supporting utf-8 is broken.
-[ 1 ] [ PCRE_CONFIG_UTF8 config ] unit-test
-
-[ 1 ] [ PCRE_CONFIG_UNICODE_PROPERTIES config ] unit-test
-
-! Tests for findall
-[
-    { { f "1999-01-12" } { "year" "1999" } { "month" "01" } { "day" "12" } }
-] [
-    "1999-01-12" iso-date <compiled-pcre> findall first
-] unit-test
-
-[ 3 ] [
-    "2003-10-09 1999-09-01 1514-10-20" iso-date <compiled-pcre> findall length
-] unit-test
-
-[ 5 ] [ "abcdef" "[a-e]" findall length ] unit-test
-
-[ 3 ] [ "foo bar baz" "foo|bar|baz" findall length ] unit-test
-
-[ 3 ] [ "örjan är åtta" "[åäö]" findall length ] unit-test
-
-[ 3 ] [ "ÅÄÖ" "\\p{Lu}" findall length ] unit-test
-
-[ 3 ] [ "foobar" "foo(?=bar)" findall first first second length ] unit-test
-
-[ { { { f ", " } } { { f ", " } } { { f "." } } } ] [
-    "Words, words, word." "\\W+" findall
-] unit-test
-
-[ { ", " ", " "." } ] [
-    "Words, words, word." "\\W+" findall [ first second ] map
-] unit-test
-
-: long-string ( -- x )
-    10000 [ CHAR: a CHAR: z [a,b] random ] "" replicate-as ;
-
-! Performance
-[ 0 ] [ long-string ".{0,15}foobar.{0,10}" findall length ] unit-test
-
-! Empty matches, corner case behaviour is copied from pcredemo.c
-[ { { { f "foo" } } { { f "" } } } ]
-[ "foo" ".*" findall ] unit-test
-
-[ { { { f "" } } { { f "" } } { { f "" } } } ]
-[ "foo" "B*" findall ] unit-test
-
-! Empty matches in strings with multi-byte characters are tricky.
-[ { { { f "" } } { { f "" } } { { f "" } } { { f "" } } } ]
-[ "öööö" "x*" findall ] unit-test
-
-! Tests for matches?
-[ t ] [ "örjan" "örjan" matches? ] unit-test
-
-[ t ] [ "abcö" "\\p{Ll}{4}" matches? ] unit-test
-
-! Dotall mode, off by default
-[ f ] [ "." <compiled-pcre> PCRE_DOTALL has-option? ] unit-test
-[ t ] [ "(?s)." <compiled-pcre> PCRE_DOTALL has-option? ] unit-test
-
-[ f ] [ "\n" "." matches? ] unit-test
-[ t ] [ "\n" "(?s)." matches? ] unit-test
-
-! Caseless mode, off by default
-[ { f t } ] [
-    { "x" "(?i)x" } [ <compiled-pcre> PCRE_CASELESS has-option? ] map
-] unit-test
-
-! Backreferences
-[ { t f } ] [
-    { "response and responsibility" "sense and responsibility" }
-    [ "(sens|respons)e and \\1ibility" matches? ] map
-] unit-test
-
-[ { t t f } ] [
-    { "rah rah" "RAH RAH" "RAH rah" } [ "((?i)rah)\\s+\\1" matches? ] map
-] unit-test
-
-! Splitting
-[ { { "Words" "words" "word" } { "Words" "words" "word" } } ] [
-    "Words, words, word." { "\\W+" "[,. ]" } [ split ] with map
-] unit-test
-
-! Bigger tests
-[ t ] [
-    "http://factorcode.org/" http-get nip
-    "href=\"(?P<link>[^\"]+)\"" findall [ "link" of ] map sequence?
-] unit-test
-
-! Test that the regexp syntax works.
-[ t ] [ "1234abcd" regexp:R[ ^\d+\w+$] matches? ] unit-test
diff --git a/basis/pcre/pcre.factor b/basis/pcre/pcre.factor
deleted file mode 100644 (file)
index 008a04a..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-USING:
-    accessors
-    alien.c-types alien.data alien.enums alien.strings
-    arrays
-    assocs
-    io.encodings.utf8 io.encodings.string
-    kernel
-    math
-    mirrors
-    pcre.ffi pcre.info pcre.utils
-    sequences sequences.generalizations
-    strings ;
-QUALIFIED: regexp
-IN: pcre
-
-ERROR: malformed-regexp expr error ;
-ERROR: pcre-error value ;
-
-TUPLE: compiled-pcre pcre extra nametable ;
-
-: default-opts ( -- opts )
-    PCRE_UTF8 PCRE_UCP bitor ;
-
-: (pcre) ( expr -- pcre err-message err-offset )
-    default-opts { c-string int } [ f pcre_compile ] with-out-parameters ;
-
-: <pcre> ( expr -- pcre )
-    dup (pcre) 2array swap [ 2nip ] [ malformed-regexp ] if* ;
-
-: exec ( pcre extra subject ofs opts -- count match-data )
-    [ dup length ] 2dip 30 int <c-array> 30 [ pcre_exec ] 2keep drop ;
-
-: <pcre-extra> ( pcre -- pcre-extra )
-    0 { c-string } [ pcre_study ] with-out-parameters drop ;
-
-: config ( what -- alien )
-    { int } [ pcre_config ] with-out-parameters ;
-
-! Finding stuff
-TUPLE: matcher pcre extra subject ofs exec-opts match ;
-
-: <matcher> ( subject compiled-pcre -- matcher )
-    [ utf8 encode ] dip [ pcre>> ] [ extra>> ] bi rot 0 0 f matcher boa ;
-
-! This handling of zero-length matches is taken from pcredemo.c
-: empty-match-opts ( -- opts )
-    PCRE_NOTEMPTY_ATSTART PCRE_ANCHORED bitor ;
-
-: findnext ( matcher -- matcher'/f )
-    clone dup <mirror> values 6 firstn drop exec
-    over dup -1 < [ PCRE_ERRORS number>enum pcre-error ] when
-    -1 =
-    [
-        2drop dup exec-opts>> 0 =
-        [ drop f ]
-        [
-            dup [ subject>> ] [ ofs>> ] bi next-utf8-char
-            [ >>ofs 0 >>exec-opts findnext ] [ drop f ] if*
-        ] if
-    ]
-    [
-        [ 2array >>match ]
-        [
-            nip
-            [ first2 = [ empty-match-opts ] [ 0 ] if >>exec-opts ]
-            [ second >>ofs ] bi
-        ] 2bi
-    ] if ;
-
-! Result parsing
-: substring-list ( subject match-array count -- alien )
-    { void* } [ pcre_get_substring_list drop ] with-out-parameters ;
-
-: parse-match ( subject nametable match-data -- match )
-    swapd first2 swap [ substring-list ] keep void* <c-direct-array>
-    [ utf8 alien>string ] { } map-as [ of swap 2array ] with map-index ;
-
-! High-level
-: <compiled-pcre> ( expr -- compiled-pcre )
-    <pcre> dup <pcre-extra> 2dup name-table-entries compiled-pcre boa ;
-
-: has-option? ( compiled-pcre option -- ? )
-    [ pcre>> options ] dip bitand 0 > ;
-
-GENERIC: findall ( subject obj -- matches )
-
-M: compiled-pcre findall
-    [ <matcher> [ findnext ] follow [ match>> ] map harvest ]
-    [ nametable>> rot [ parse-match ] 2with map ] 2bi >array ;
-
-M: string findall
-    <compiled-pcre> findall ;
-
-M: regexp:regexp findall
-    raw>> findall ;
-
-: matches? ( subject obj -- ? )
-    dupd findall [ nip length 1 = ] [ ?first ?first ?last = ] 2bi and ;
-
-: split ( subject obj -- strings )
-    dupd findall [ first second ] map split-subseqs ;
diff --git a/basis/pcre/utils/utils-tests.factor b/basis/pcre/utils/utils-tests.factor
deleted file mode 100644 (file)
index f53b823..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-USING: pcre.utils tools.test ;
-IN: pcre.utils.tests
-
-[ { "Bords" "words" "word" } ] [
-    "Bords, words, word." { ", " ", " "." } split-subseqs
-] unit-test
diff --git a/basis/pcre/utils/utils.factor b/basis/pcre/utils/utils.factor
deleted file mode 100644 (file)
index f5fea26..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-USING: assocs fry kernel math mirrors sequences splitting strings ;
-IN: pcre.utils
-
-: replace-all ( seq subseqs new -- seq )
-    swapd '[ _ replace ] reduce ;
-
-: split-subseqs ( seq subseqs -- seqs )
-    dup first [ replace-all ] keep split-subseq [ >string ] map harvest ;
-
-: 2with ( param1 param2 obj quot -- obj curry )
-    [ -rot ] dip [ [ rot ] dip call ] 3curry ; inline
-
-: gen-array-addrs ( base size n -- addrs )
-    iota [ * + ] 2with map ;
-
-: utf8-start-byte? ( byte -- ? )
-    0xc0 bitand 0x80 = not ;
-
-: next-utf8-char ( byte-array pos -- pos' )
-    1 + 2dup swap ?nth
-    [ utf8-start-byte? [ nip ] [ next-utf8-char ] if ] [ 2drop f ] if* ;
diff --git a/extra/pcre/ffi/ffi-tests.factor b/extra/pcre/ffi/ffi-tests.factor
new file mode 100644 (file)
index 0000000..491ae1e
--- /dev/null
@@ -0,0 +1,4 @@
+USING: pcre.ffi sequences splitting tools.test ;
+IN: pcre.ffi.tests
+
+[ 2 ] [ pcre_version " " split length ] unit-test
diff --git a/extra/pcre/ffi/ffi.factor b/extra/pcre/ffi/ffi.factor
new file mode 100644 (file)
index 0000000..6a31888
--- /dev/null
@@ -0,0 +1,188 @@
+USING:
+    alien alien.c-types alien.data alien.libraries alien.syntax
+    classes.struct
+    combinators
+    system ;
+IN: pcre.ffi
+
+! http://sourceforge.net/projects/gnuwin32/files/pcre/7.0/pcre-7.0-bin.zip/download
+
+<< "pcre" {
+    { [ os unix? ] [ "libpcre.so" ] }
+    { [ os windows? ] [ "pcre3.dll" ] }
+} cond cdecl add-library >>
+
+LIBRARY: pcre
+
+CONSTANT: PCRE_CASELESS           0x00000001
+CONSTANT: PCRE_MULTILINE          0x00000002
+CONSTANT: PCRE_DOTALL             0x00000004
+CONSTANT: PCRE_EXTENDED           0x00000008
+CONSTANT: PCRE_ANCHORED           0x00000010
+CONSTANT: PCRE_DOLLAR_ENDONLY     0x00000020
+CONSTANT: PCRE_EXTRA              0x00000040
+CONSTANT: PCRE_NOTBOL             0x00000080
+CONSTANT: PCRE_NOTEOL             0x00000100
+CONSTANT: PCRE_UNGREEDY           0x00000200
+CONSTANT: PCRE_NOTEMPTY           0x00000400
+CONSTANT: PCRE_UTF8               0x00000800
+CONSTANT: PCRE_NO_AUTO_CAPTURE    0x00001000
+CONSTANT: PCRE_NO_UTF8_CHECK      0x00002000
+CONSTANT: PCRE_AUTO_CALLOUT       0x00004000
+CONSTANT: PCRE_PARTIAL_SOFT       0x00008000
+CONSTANT: PCRE_PARTIAL            0x00008000
+CONSTANT: PCRE_DFA_SHORTEST       0x00010000
+CONSTANT: PCRE_DFA_RESTART        0x00020000
+CONSTANT: PCRE_FIRSTLINE          0x00040000
+CONSTANT: PCRE_DUPNAMES           0x00080000
+CONSTANT: PCRE_NEWLINE_CR         0x00100000
+CONSTANT: PCRE_NEWLINE_LF         0x00200000
+CONSTANT: PCRE_NEWLINE_CRLF       0x00300000
+CONSTANT: PCRE_NEWLINE_ANY        0x00400000
+CONSTANT: PCRE_NEWLINE_ANYCRLF    0x00500000
+CONSTANT: PCRE_BSR_ANYCRLF        0x00800000
+CONSTANT: PCRE_BSR_UNICODE        0x01000000
+CONSTANT: PCRE_JAVASCRIPT_COMPAT  0x02000000
+CONSTANT: PCRE_NO_START_OPTIMIZE  0x04000000
+CONSTANT: PCRE_NO_START_OPTIMISE  0x04000000
+CONSTANT: PCRE_PARTIAL_HARD       0x08000000
+CONSTANT: PCRE_NOTEMPTY_ATSTART   0x10000000
+CONSTANT: PCRE_UCP                0x20000000
+
+ENUM: PCRE_ERRORS
+    { PCRE_ERROR_NOMATCH         -1 }
+    { PCRE_ERROR_NULL            -2 }
+    { PCRE_ERROR_BADOPTION       -3 }
+    { PCRE_ERROR_BADMAGIC        -4 }
+    { PCRE_ERROR_UNKNOWN_OPCODE  -5 }
+    { PCRE_ERROR_UNKNOWN_NODE    -5 }
+    { PCRE_ERROR_NOMEMORY        -6 }
+    { PCRE_ERROR_NOSUBSTRING     -7 }
+    { PCRE_ERROR_MATCHLIMIT      -8 }
+    { PCRE_ERROR_CALLOUT         -9 }
+    { PCRE_ERROR_BADUTF8        -10 }
+    { PCRE_ERROR_BADUTF8_OFFSET -11 }
+    { PCRE_ERROR_PARTIAL        -12 }
+    { PCRE_ERROR_BADPARTIAL     -13 }
+    { PCRE_ERROR_INTERNAL       -14 }
+    { PCRE_ERROR_BADCOUNT       -15 }
+    { PCRE_ERROR_DFA_UITEM      -16 }
+    { PCRE_ERROR_DFA_UCOND      -17 }
+    { PCRE_ERROR_DFA_UMLIMIT    -18 }
+    { PCRE_ERROR_DFA_WSSIZE     -19 }
+    { PCRE_ERROR_DFA_RECURSE    -20 }
+    { PCRE_ERROR_RECURSIONLIMIT -21 }
+    { PCRE_ERROR_NULLWSLIMIT    -22 }
+    { PCRE_ERROR_BADNEWLINE     -23 }
+    { PCRE_ERROR_BADOFFSET      -24 }
+    { PCRE_ERROR_SHORTUTF8      -25 } ;
+
+CONSTANT: PCRE_ERROR_NOMATCH         -1
+CONSTANT: PCRE_ERROR_NULL            -2
+CONSTANT: PCRE_ERROR_BADOPTION       -3
+CONSTANT: PCRE_ERROR_BADMAGIC        -4
+CONSTANT: PCRE_ERROR_UNKNOWN_OPCODE  -5
+CONSTANT: PCRE_ERROR_UNKNOWN_NODE    -5
+CONSTANT: PCRE_ERROR_NOMEMORY        -6
+CONSTANT: PCRE_ERROR_NOSUBSTRING     -7
+CONSTANT: PCRE_ERROR_MATCHLIMIT      -8
+CONSTANT: PCRE_ERROR_CALLOUT         -9
+CONSTANT: PCRE_ERROR_BADUTF8        -10
+CONSTANT: PCRE_ERROR_BADUTF8_OFFSET -11
+CONSTANT: PCRE_ERROR_PARTIAL        -12
+CONSTANT: PCRE_ERROR_BADPARTIAL     -13
+CONSTANT: PCRE_ERROR_INTERNAL       -14
+CONSTANT: PCRE_ERROR_BADCOUNT       -15
+CONSTANT: PCRE_ERROR_DFA_UITEM      -16
+CONSTANT: PCRE_ERROR_DFA_UCOND      -17
+CONSTANT: PCRE_ERROR_DFA_UMLIMIT    -18
+CONSTANT: PCRE_ERROR_DFA_WSSIZE     -19
+CONSTANT: PCRE_ERROR_DFA_RECURSE    -20
+CONSTANT: PCRE_ERROR_RECURSIONLIMIT -21
+CONSTANT: PCRE_ERROR_NULLWSLIMIT    -22
+CONSTANT: PCRE_ERROR_BADNEWLINE     -23
+CONSTANT: PCRE_ERROR_BADOFFSET      -24
+CONSTANT: PCRE_ERROR_SHORTUTF8      -25
+
+CONSTANT: PCRE_INFO_OPTIONS            0
+CONSTANT: PCRE_INFO_SIZE               1
+CONSTANT: PCRE_INFO_CAPTURECOUNT       2
+CONSTANT: PCRE_INFO_BACKREFMAX         3
+CONSTANT: PCRE_INFO_FIRSTBYTE          4
+CONSTANT: PCRE_INFO_FIRSTCHAR          4
+CONSTANT: PCRE_INFO_FIRSTTABLE         5
+CONSTANT: PCRE_INFO_LASTLITERAL        6
+CONSTANT: PCRE_INFO_NAMEENTRYSIZE      7
+CONSTANT: PCRE_INFO_NAMECOUNT          8
+CONSTANT: PCRE_INFO_NAMETABLE          9
+CONSTANT: PCRE_INFO_STUDYSIZE         10
+CONSTANT: PCRE_INFO_DEFAULT_TABLES    11
+CONSTANT: PCRE_INFO_OKPARTIAL         12
+CONSTANT: PCRE_INFO_JCHANGED          13
+CONSTANT: PCRE_INFO_HASCRORLF         14
+CONSTANT: PCRE_INFO_MINLENGTH         15
+
+CONSTANT: PCRE_CONFIG_UTF8                    0
+CONSTANT: PCRE_CONFIG_NEWLINE                 1
+CONSTANT: PCRE_CONFIG_LINK_SIZE               2
+CONSTANT: PCRE_CONFIG_POSIX_MALLOC_THRESHOLD  3
+CONSTANT: PCRE_CONFIG_MATCH_LIMIT             4
+CONSTANT: PCRE_CONFIG_STACKRECURSE            5
+CONSTANT: PCRE_CONFIG_UNICODE_PROPERTIES      6
+CONSTANT: PCRE_CONFIG_MATCH_LIMIT_RECURSION   7
+CONSTANT: PCRE_CONFIG_BSR                     8
+
+
+STRUCT: pcre_extra
+    { flags int }
+    { study_data void* }
+    { match_limit long }
+    { callout_data void* }
+    { tables uchar* }
+    { match_limit_recursion int }
+    { mark uchar** } ;
+
+FUNCTION: void pcre_config ( int what, void* where ) ;
+
+FUNCTION: void* pcre_compile ( c-string pattern,
+                               int options,
+                               char** errptr,
+                               int* erroffset,
+                               char* tableptr ) ;
+
+FUNCTION: void* pcre_compile2 ( c-string pattern,
+                                int options,
+                                int* errcodeptr,
+                                char** errptr,
+                                int* erroffset,
+                                char* tableptr ) ;
+
+FUNCTION: int pcre_info ( void* pcre, int* optptr, int* first_byte ) ;
+FUNCTION: int pcre_fullinfo ( void* pcre, pcre_extra* extra, int what, void *where ) ;
+
+FUNCTION: pcre_extra* pcre_study ( void* pcre, int options, char** errptr ) ;
+FUNCTION: int pcre_exec ( void* pcre,
+                          pcre_extra* extra,
+                          c-string subject,
+                          int length,
+                          int startoffset,
+                          int options,
+                          int* ovector,
+                          int ovecsize ) ;
+
+FUNCTION: int pcre_get_stringnumber ( void* pcre, c-string name ) ;
+
+FUNCTION: int pcre_get_substring ( c-string subject,
+                                   int* ovector,
+                                   int stringcount,
+                                   int stringnumber,
+                                   void *stringptr ) ;
+
+FUNCTION: int pcre_get_substring_list ( c-string subject,
+                                        int* ovector,
+                                        int stringcount,
+                                        void *stringptr ) ;
+
+FUNCTION: c-string pcre_version ( ) ;
+
+FUNCTION: uchar* pcre_maketables ( ) ;
diff --git a/extra/pcre/info/info-tests.factor b/extra/pcre/info/info-tests.factor
new file mode 100644 (file)
index 0000000..be689fd
--- /dev/null
@@ -0,0 +1,9 @@
+USING: accessors pcre pcre.info pcre.utils sequences tools.test ;
+
+[ { { 3 "day" } { 2 "month" } { 1 "year" } } ]
+[
+    "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})" <compiled-pcre>
+    nametable>>
+] unit-test
+
+[ { 100 110 120 130 } ] [ 100 10 4 gen-array-addrs ] unit-test
diff --git a/extra/pcre/info/info.factor b/extra/pcre/info/info.factor
new file mode 100644 (file)
index 0000000..29a4c40
--- /dev/null
@@ -0,0 +1,34 @@
+USING:
+    accessors
+    alien alien.accessors alien.c-types alien.data alien.strings
+    arrays
+    io.encodings.utf8
+    kernel
+    math
+    pcre.ffi pcre.utils
+    sequences ;
+IN: pcre.info
+
+! Mostly internal
+: fullinfo ( pcre extra what -- obj )
+    { int } [ pcre_fullinfo ] with-out-parameters nip ;
+
+: name-count ( pcre extra -- n )
+    PCRE_INFO_NAMECOUNT fullinfo ;
+
+: name-table ( pcre extra -- addr )
+    PCRE_INFO_NAMETABLE fullinfo ;
+
+: name-entry-size ( pcre extra -- size )
+    PCRE_INFO_NAMEENTRYSIZE fullinfo ;
+
+: name-table-entry ( addr -- group-index group-name )
+    [ <alien> 1 alien-unsigned-1 ] [ 2 + <alien> utf8 alien>string ] bi ;
+
+: options ( pcre -- opts )
+    f PCRE_INFO_OPTIONS fullinfo ;
+
+! Exported
+: name-table-entries ( pcre extra -- addrs )
+    [ name-table ] [ name-entry-size ] [ name-count ] 2tri gen-array-addrs
+    [ name-table-entry 2array ] map ;
diff --git a/extra/pcre/pcre-tests.factor b/extra/pcre/pcre-tests.factor
new file mode 100644 (file)
index 0000000..3963c5b
--- /dev/null
@@ -0,0 +1,123 @@
+USING:
+    accessors
+    arrays
+    assocs
+    http.client
+    kernel
+    math math.ranges
+    pcre pcre.ffi pcre.info
+    random
+    sequences
+    system
+    tools.test ;
+QUALIFIED: regexp
+IN: pcre.tests
+
+CONSTANT: iso-date "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
+
+! On windows the erroffset appears to be set to 0 despite there being
+! nothing wrong with the regexp.
+[ t ] [
+    "foo" (pcre) 3array 1 tail { { f -1 } { f 0 } } member?
+] unit-test
+
+[ { 1 2 3 } ] [
+    iso-date <pcre>
+    { "year" "month" "day" } [ pcre_get_stringnumber ] with map
+] unit-test
+
+[ t ] [ "foo" <compiled-pcre> PCRE_UTF8 has-option? ] unit-test
+
+os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE config ] unit-test ] when
+
+! In this day and age, not supporting utf-8 is broken.
+[ 1 ] [ PCRE_CONFIG_UTF8 config ] unit-test
+
+[ 1 ] [ PCRE_CONFIG_UNICODE_PROPERTIES config ] unit-test
+
+! Tests for findall
+[
+    { { f "1999-01-12" } { "year" "1999" } { "month" "01" } { "day" "12" } }
+] [
+    "1999-01-12" iso-date <compiled-pcre> findall first
+] unit-test
+
+[ 3 ] [
+    "2003-10-09 1999-09-01 1514-10-20" iso-date <compiled-pcre> findall length
+] unit-test
+
+[ 5 ] [ "abcdef" "[a-e]" findall length ] unit-test
+
+[ 3 ] [ "foo bar baz" "foo|bar|baz" findall length ] unit-test
+
+[ 3 ] [ "örjan är åtta" "[åäö]" findall length ] unit-test
+
+[ 3 ] [ "ÅÄÖ" "\\p{Lu}" findall length ] unit-test
+
+[ 3 ] [ "foobar" "foo(?=bar)" findall first first second length ] unit-test
+
+[ { { { f ", " } } { { f ", " } } { { f "." } } } ] [
+    "Words, words, word." "\\W+" findall
+] unit-test
+
+[ { ", " ", " "." } ] [
+    "Words, words, word." "\\W+" findall [ first second ] map
+] unit-test
+
+: long-string ( -- x )
+    10000 [ CHAR: a CHAR: z [a,b] random ] "" replicate-as ;
+
+! Performance
+[ 0 ] [ long-string ".{0,15}foobar.{0,10}" findall length ] unit-test
+
+! Empty matches, corner case behaviour is copied from pcredemo.c
+[ { { { f "foo" } } { { f "" } } } ]
+[ "foo" ".*" findall ] unit-test
+
+[ { { { f "" } } { { f "" } } { { f "" } } } ]
+[ "foo" "B*" findall ] unit-test
+
+! Empty matches in strings with multi-byte characters are tricky.
+[ { { { f "" } } { { f "" } } { { f "" } } { { f "" } } } ]
+[ "öööö" "x*" findall ] unit-test
+
+! Tests for matches?
+[ t ] [ "örjan" "örjan" matches? ] unit-test
+
+[ t ] [ "abcö" "\\p{Ll}{4}" matches? ] unit-test
+
+! Dotall mode, off by default
+[ f ] [ "." <compiled-pcre> PCRE_DOTALL has-option? ] unit-test
+[ t ] [ "(?s)." <compiled-pcre> PCRE_DOTALL has-option? ] unit-test
+
+[ f ] [ "\n" "." matches? ] unit-test
+[ t ] [ "\n" "(?s)." matches? ] unit-test
+
+! Caseless mode, off by default
+[ { f t } ] [
+    { "x" "(?i)x" } [ <compiled-pcre> PCRE_CASELESS has-option? ] map
+] unit-test
+
+! Backreferences
+[ { t f } ] [
+    { "response and responsibility" "sense and responsibility" }
+    [ "(sens|respons)e and \\1ibility" matches? ] map
+] unit-test
+
+[ { t t f } ] [
+    { "rah rah" "RAH RAH" "RAH rah" } [ "((?i)rah)\\s+\\1" matches? ] map
+] unit-test
+
+! Splitting
+[ { { "Words" "words" "word" } { "Words" "words" "word" } } ] [
+    "Words, words, word." { "\\W+" "[,. ]" } [ split ] with map
+] unit-test
+
+! Bigger tests
+[ t ] [
+    "http://factorcode.org/" http-get nip
+    "href=\"(?P<link>[^\"]+)\"" findall [ "link" of ] map sequence?
+] unit-test
+
+! Test that the regexp syntax works.
+[ t ] [ "1234abcd" regexp:R[ ^\d+\w+$] matches? ] unit-test
diff --git a/extra/pcre/pcre.factor b/extra/pcre/pcre.factor
new file mode 100644 (file)
index 0000000..008a04a
--- /dev/null
@@ -0,0 +1,101 @@
+USING:
+    accessors
+    alien.c-types alien.data alien.enums alien.strings
+    arrays
+    assocs
+    io.encodings.utf8 io.encodings.string
+    kernel
+    math
+    mirrors
+    pcre.ffi pcre.info pcre.utils
+    sequences sequences.generalizations
+    strings ;
+QUALIFIED: regexp
+IN: pcre
+
+ERROR: malformed-regexp expr error ;
+ERROR: pcre-error value ;
+
+TUPLE: compiled-pcre pcre extra nametable ;
+
+: default-opts ( -- opts )
+    PCRE_UTF8 PCRE_UCP bitor ;
+
+: (pcre) ( expr -- pcre err-message err-offset )
+    default-opts { c-string int } [ f pcre_compile ] with-out-parameters ;
+
+: <pcre> ( expr -- pcre )
+    dup (pcre) 2array swap [ 2nip ] [ malformed-regexp ] if* ;
+
+: exec ( pcre extra subject ofs opts -- count match-data )
+    [ dup length ] 2dip 30 int <c-array> 30 [ pcre_exec ] 2keep drop ;
+
+: <pcre-extra> ( pcre -- pcre-extra )
+    0 { c-string } [ pcre_study ] with-out-parameters drop ;
+
+: config ( what -- alien )
+    { int } [ pcre_config ] with-out-parameters ;
+
+! Finding stuff
+TUPLE: matcher pcre extra subject ofs exec-opts match ;
+
+: <matcher> ( subject compiled-pcre -- matcher )
+    [ utf8 encode ] dip [ pcre>> ] [ extra>> ] bi rot 0 0 f matcher boa ;
+
+! This handling of zero-length matches is taken from pcredemo.c
+: empty-match-opts ( -- opts )
+    PCRE_NOTEMPTY_ATSTART PCRE_ANCHORED bitor ;
+
+: findnext ( matcher -- matcher'/f )
+    clone dup <mirror> values 6 firstn drop exec
+    over dup -1 < [ PCRE_ERRORS number>enum pcre-error ] when
+    -1 =
+    [
+        2drop dup exec-opts>> 0 =
+        [ drop f ]
+        [
+            dup [ subject>> ] [ ofs>> ] bi next-utf8-char
+            [ >>ofs 0 >>exec-opts findnext ] [ drop f ] if*
+        ] if
+    ]
+    [
+        [ 2array >>match ]
+        [
+            nip
+            [ first2 = [ empty-match-opts ] [ 0 ] if >>exec-opts ]
+            [ second >>ofs ] bi
+        ] 2bi
+    ] if ;
+
+! Result parsing
+: substring-list ( subject match-array count -- alien )
+    { void* } [ pcre_get_substring_list drop ] with-out-parameters ;
+
+: parse-match ( subject nametable match-data -- match )
+    swapd first2 swap [ substring-list ] keep void* <c-direct-array>
+    [ utf8 alien>string ] { } map-as [ of swap 2array ] with map-index ;
+
+! High-level
+: <compiled-pcre> ( expr -- compiled-pcre )
+    <pcre> dup <pcre-extra> 2dup name-table-entries compiled-pcre boa ;
+
+: has-option? ( compiled-pcre option -- ? )
+    [ pcre>> options ] dip bitand 0 > ;
+
+GENERIC: findall ( subject obj -- matches )
+
+M: compiled-pcre findall
+    [ <matcher> [ findnext ] follow [ match>> ] map harvest ]
+    [ nametable>> rot [ parse-match ] 2with map ] 2bi >array ;
+
+M: string findall
+    <compiled-pcre> findall ;
+
+M: regexp:regexp findall
+    raw>> findall ;
+
+: matches? ( subject obj -- ? )
+    dupd findall [ nip length 1 = ] [ ?first ?first ?last = ] 2bi and ;
+
+: split ( subject obj -- strings )
+    dupd findall [ first second ] map split-subseqs ;
diff --git a/extra/pcre/utils/utils-tests.factor b/extra/pcre/utils/utils-tests.factor
new file mode 100644 (file)
index 0000000..f53b823
--- /dev/null
@@ -0,0 +1,6 @@
+USING: pcre.utils tools.test ;
+IN: pcre.utils.tests
+
+[ { "Bords" "words" "word" } ] [
+    "Bords, words, word." { ", " ", " "." } split-subseqs
+] unit-test
diff --git a/extra/pcre/utils/utils.factor b/extra/pcre/utils/utils.factor
new file mode 100644 (file)
index 0000000..f5fea26
--- /dev/null
@@ -0,0 +1,21 @@
+USING: assocs fry kernel math mirrors sequences splitting strings ;
+IN: pcre.utils
+
+: replace-all ( seq subseqs new -- seq )
+    swapd '[ _ replace ] reduce ;
+
+: split-subseqs ( seq subseqs -- seqs )
+    dup first [ replace-all ] keep split-subseq [ >string ] map harvest ;
+
+: 2with ( param1 param2 obj quot -- obj curry )
+    [ -rot ] dip [ [ rot ] dip call ] 3curry ; inline
+
+: gen-array-addrs ( base size n -- addrs )
+    iota [ * + ] 2with map ;
+
+: utf8-start-byte? ( byte -- ? )
+    0xc0 bitand 0x80 = not ;
+
+: next-utf8-char ( byte-array pos -- pos' )
+    1 + 2dup swap ?nth
+    [ utf8-start-byte? [ nip ] [ next-utf8-char ] if ] [ 2drop f ] if* ;