+++ /dev/null
-USING: parser-combinators.regexp tools.test kernel ;
-IN: parser-combinators.regexp.tests
-
-[ f ] [ "b" "a*" f <regexp> matches? ] unit-test
-[ t ] [ "" "a*" f <regexp> matches? ] unit-test
-[ t ] [ "a" "a*" f <regexp> matches? ] unit-test
-[ t ] [ "aaaaaaa" "a*" f <regexp> matches? ] unit-test
-[ f ] [ "ab" "a*" f <regexp> matches? ] unit-test
-
-[ t ] [ "abc" "abc" f <regexp> matches? ] unit-test
-[ t ] [ "a" "a|b|c" f <regexp> matches? ] unit-test
-[ t ] [ "b" "a|b|c" f <regexp> matches? ] unit-test
-[ t ] [ "c" "a|b|c" f <regexp> matches? ] unit-test
-[ f ] [ "c" "d|e|f" f <regexp> matches? ] unit-test
-
-[ f ] [ "aa" "a|b|c" f <regexp> matches? ] unit-test
-[ f ] [ "bb" "a|b|c" f <regexp> matches? ] unit-test
-[ f ] [ "cc" "a|b|c" f <regexp> matches? ] unit-test
-[ f ] [ "cc" "d|e|f" f <regexp> matches? ] unit-test
-
-[ f ] [ "" "a+" f <regexp> matches? ] unit-test
-[ t ] [ "a" "a+" f <regexp> matches? ] unit-test
-[ t ] [ "aa" "a+" f <regexp> matches? ] unit-test
-
-[ t ] [ "" "a?" f <regexp> matches? ] unit-test
-[ t ] [ "a" "a?" f <regexp> matches? ] unit-test
-[ f ] [ "aa" "a?" f <regexp> matches? ] unit-test
-
-[ f ] [ "" "." f <regexp> matches? ] unit-test
-[ t ] [ "a" "." f <regexp> matches? ] unit-test
-[ t ] [ "." "." f <regexp> matches? ] unit-test
-! [ f ] [ "\n" "." f <regexp> matches? ] unit-test
-
-[ f ] [ "" ".+" f <regexp> matches? ] unit-test
-[ t ] [ "a" ".+" f <regexp> matches? ] unit-test
-[ t ] [ "ab" ".+" f <regexp> matches? ] unit-test
-
-[ t ] [ "" "a|b*|c+|d?" f <regexp> matches? ] unit-test
-[ t ] [ "a" "a|b*|c+|d?" f <regexp> matches? ] unit-test
-[ t ] [ "c" "a|b*|c+|d?" f <regexp> matches? ] unit-test
-[ t ] [ "cc" "a|b*|c+|d?" f <regexp> matches? ] unit-test
-[ f ] [ "ccd" "a|b*|c+|d?" f <regexp> matches? ] unit-test
-[ t ] [ "d" "a|b*|c+|d?" f <regexp> matches? ] unit-test
-
-[ t ] [ "foo" "foo|bar" f <regexp> matches? ] unit-test
-[ t ] [ "bar" "foo|bar" f <regexp> matches? ] unit-test
-[ f ] [ "foobar" "foo|bar" f <regexp> matches? ] unit-test
-
-[ f ] [ "" "(a)" f <regexp> matches? ] unit-test
-[ t ] [ "a" "(a)" f <regexp> matches? ] unit-test
-[ f ] [ "aa" "(a)" f <regexp> matches? ] unit-test
-[ t ] [ "aa" "(a*)" f <regexp> matches? ] unit-test
-
-[ f ] [ "aababaaabbac" "(a|b)+" f <regexp> matches? ] unit-test
-[ t ] [ "ababaaabba" "(a|b)+" f <regexp> matches? ] unit-test
-
-[ f ] [ "" "a{1}" f <regexp> matches? ] unit-test
-[ t ] [ "a" "a{1}" f <regexp> matches? ] unit-test
-[ f ] [ "aa" "a{1}" f <regexp> matches? ] unit-test
-
-[ f ] [ "a" "a{2,}" f <regexp> matches? ] unit-test
-[ t ] [ "aaa" "a{2,}" f <regexp> matches? ] unit-test
-[ t ] [ "aaaa" "a{2,}" f <regexp> matches? ] unit-test
-[ t ] [ "aaaaa" "a{2,}" f <regexp> matches? ] unit-test
-
-[ t ] [ "" "a{,2}" f <regexp> matches? ] unit-test
-[ t ] [ "a" "a{,2}" f <regexp> matches? ] unit-test
-[ t ] [ "aa" "a{,2}" f <regexp> matches? ] unit-test
-[ f ] [ "aaa" "a{,2}" f <regexp> matches? ] unit-test
-[ f ] [ "aaaa" "a{,2}" f <regexp> matches? ] unit-test
-[ f ] [ "aaaaa" "a{,2}" f <regexp> matches? ] unit-test
-
-[ f ] [ "" "a{1,3}" f <regexp> matches? ] unit-test
-[ t ] [ "a" "a{1,3}" f <regexp> matches? ] unit-test
-[ t ] [ "aa" "a{1,3}" f <regexp> matches? ] unit-test
-[ t ] [ "aaa" "a{1,3}" f <regexp> matches? ] unit-test
-[ f ] [ "aaaa" "a{1,3}" f <regexp> matches? ] unit-test
-
-[ f ] [ "" "[a]" f <regexp> matches? ] unit-test
-[ t ] [ "a" "[a]" f <regexp> matches? ] unit-test
-[ t ] [ "a" "[abc]" f <regexp> matches? ] unit-test
-[ f ] [ "b" "[a]" f <regexp> matches? ] unit-test
-[ f ] [ "d" "[abc]" f <regexp> matches? ] unit-test
-[ t ] [ "ab" "[abc]{1,2}" f <regexp> matches? ] unit-test
-[ f ] [ "abc" "[abc]{1,2}" f <regexp> matches? ] unit-test
-
-[ f ] [ "" "[^a]" f <regexp> matches? ] unit-test
-[ f ] [ "a" "[^a]" f <regexp> matches? ] unit-test
-[ f ] [ "a" "[^abc]" f <regexp> matches? ] unit-test
-[ t ] [ "b" "[^a]" f <regexp> matches? ] unit-test
-[ t ] [ "d" "[^abc]" f <regexp> matches? ] unit-test
-[ f ] [ "ab" "[^abc]{1,2}" f <regexp> matches? ] unit-test
-[ f ] [ "abc" "[^abc]{1,2}" f <regexp> matches? ] unit-test
-
-[ t ] [ "]" "[]]" f <regexp> matches? ] unit-test
-[ f ] [ "]" "[^]]" f <regexp> matches? ] unit-test
-
-! [ "^" "[^]" f <regexp> matches? ] must-fail
-[ t ] [ "^" "[]^]" f <regexp> matches? ] unit-test
-[ t ] [ "]" "[]^]" f <regexp> matches? ] unit-test
-
-[ t ] [ "[" "[[]" f <regexp> matches? ] unit-test
-[ f ] [ "^" "[^^]" f <regexp> matches? ] unit-test
-[ t ] [ "a" "[^^]" f <regexp> matches? ] unit-test
-
-[ t ] [ "-" "[-]" f <regexp> matches? ] unit-test
-[ f ] [ "a" "[-]" f <regexp> matches? ] unit-test
-[ f ] [ "-" "[^-]" f <regexp> matches? ] unit-test
-[ t ] [ "a" "[^-]" f <regexp> matches? ] unit-test
-
-[ t ] [ "-" "[-a]" f <regexp> matches? ] unit-test
-[ t ] [ "a" "[-a]" f <regexp> matches? ] unit-test
-[ t ] [ "-" "[a-]" f <regexp> matches? ] unit-test
-[ t ] [ "a" "[a-]" f <regexp> matches? ] unit-test
-[ f ] [ "b" "[a-]" f <regexp> matches? ] unit-test
-[ f ] [ "-" "[^-]" f <regexp> matches? ] unit-test
-[ t ] [ "a" "[^-]" f <regexp> matches? ] unit-test
-
-[ f ] [ "-" "[a-c]" f <regexp> matches? ] unit-test
-[ t ] [ "-" "[^a-c]" f <regexp> matches? ] unit-test
-[ t ] [ "b" "[a-c]" f <regexp> matches? ] unit-test
-[ f ] [ "b" "[^a-c]" f <regexp> matches? ] unit-test
-
-[ t ] [ "-" "[a-c-]" f <regexp> matches? ] unit-test
-[ f ] [ "-" "[^a-c-]" f <regexp> matches? ] unit-test
-
-[ t ] [ "\\" "[\\\\]" f <regexp> matches? ] unit-test
-[ f ] [ "a" "[\\\\]" f <regexp> matches? ] unit-test
-[ f ] [ "\\" "[^\\\\]" f <regexp> matches? ] unit-test
-[ t ] [ "a" "[^\\\\]" f <regexp> matches? ] unit-test
-
-[ t ] [ "0" "[\\d]" f <regexp> matches? ] unit-test
-[ f ] [ "a" "[\\d]" f <regexp> matches? ] unit-test
-[ f ] [ "0" "[^\\d]" f <regexp> matches? ] unit-test
-[ t ] [ "a" "[^\\d]" f <regexp> matches? ] unit-test
-
-[ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" f <regexp> matches? ] unit-test
-[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" f <regexp> matches? ] unit-test
-[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" f <regexp> matches? ] unit-test
-
-[ t ] [ "1000" "\\d{4,6}" f <regexp> matches? ] unit-test
-[ t ] [ "1000" "[0-9]{4,6}" f <regexp> matches? ] unit-test
-
-[ t ] [ "abc" "\\p{Lower}{3}" f <regexp> matches? ] unit-test
-[ f ] [ "ABC" "\\p{Lower}{3}" f <regexp> matches? ] unit-test
-[ t ] [ "ABC" "\\p{Upper}{3}" f <regexp> matches? ] unit-test
-[ f ] [ "abc" "\\p{Upper}{3}" f <regexp> matches? ] unit-test
-
-[ f ] [ "abc" "[\\p{Upper}]{3}" f <regexp> matches? ] unit-test
-[ t ] [ "ABC" "[\\p{Upper}]{3}" f <regexp> matches? ] unit-test
-
-[ t ] [ "" "\\Q\\E" f <regexp> matches? ] unit-test
-[ f ] [ "a" "\\Q\\E" f <regexp> matches? ] unit-test
-[ t ] [ "|*+" "\\Q|*+\\E" f <regexp> matches? ] unit-test
-[ f ] [ "abc" "\\Q|*+\\E" f <regexp> matches? ] unit-test
-
-[ t ] [ "S" "\\0123" f <regexp> matches? ] unit-test
-[ t ] [ "SXY" "\\0123XY" f <regexp> matches? ] unit-test
-[ t ] [ "x" "\\x78" f <regexp> matches? ] unit-test
-[ f ] [ "y" "\\x78" f <regexp> matches? ] unit-test
-[ t ] [ "x" "\\u000078" f <regexp> matches? ] unit-test
-[ f ] [ "y" "\\u000078" f <regexp> matches? ] unit-test
-
-[ t ] [ "ab" "a+b" f <regexp> matches? ] unit-test
-[ f ] [ "b" "a+b" f <regexp> matches? ] unit-test
-[ t ] [ "aab" "a+b" f <regexp> matches? ] unit-test
-[ f ] [ "abb" "a+b" f <regexp> matches? ] unit-test
-
-[ t ] [ "abbbb" "ab*" f <regexp> matches? ] unit-test
-[ t ] [ "a" "ab*" f <regexp> matches? ] unit-test
-[ f ] [ "abab" "ab*" f <regexp> matches? ] unit-test
-
-[ f ] [ "x" "\\." f <regexp> matches? ] unit-test
-[ t ] [ "." "\\." f <regexp> matches? ] unit-test
-
-[ t ] [ "aaaab" "a+ab" f <regexp> matches? ] unit-test
-[ f ] [ "aaaxb" "a+ab" f <regexp> matches? ] unit-test
-[ t ] [ "aaacb" "a+cb" f <regexp> matches? ] unit-test
-[ f ] [ "aaaab" "a++ab" f <regexp> matches? ] unit-test
-[ t ] [ "aaacb" "a++cb" f <regexp> matches? ] unit-test
-
-[ 3 ] [ "aaacb" "a*" f <regexp> match-head ] unit-test
-[ 1 ] [ "aaacb" "a+?" f <regexp> match-head ] unit-test
-[ 2 ] [ "aaacb" "aa?" f <regexp> match-head ] unit-test
-[ 1 ] [ "aaacb" "aa??" f <regexp> match-head ] unit-test
-[ 3 ] [ "aacb" "aa?c" f <regexp> match-head ] unit-test
-[ 3 ] [ "aacb" "aa??c" f <regexp> match-head ] unit-test
-
-[ t ] [ "aaa" "AAA" t <regexp> matches? ] unit-test
-[ f ] [ "aax" "AAA" t <regexp> matches? ] unit-test
-[ t ] [ "aaa" "A*" t <regexp> matches? ] unit-test
-[ f ] [ "aaba" "A*" t <regexp> matches? ] unit-test
-[ t ] [ "b" "[AB]" t <regexp> matches? ] unit-test
-[ f ] [ "c" "[AB]" t <regexp> matches? ] unit-test
-[ t ] [ "c" "[A-Z]" t <regexp> matches? ] unit-test
-[ f ] [ "3" "[A-Z]" t <regexp> matches? ] unit-test
-
-[ ] [
- "(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
- f <regexp> drop
-] unit-test
-
-[ t ] [ "fxxbar" "(?!foo).{3}bar" f <regexp> matches? ] unit-test
-[ f ] [ "foobar" "(?!foo).{3}bar" f <regexp> matches? ] unit-test
-
-[ 3 ] [ "foobar" "foo(?=bar)" f <regexp> match-head ] unit-test
-[ f ] [ "foobxr" "foo(?=bar)" f <regexp> match-head ] unit-test
-
-[ f ] [ "foobxr" "foo\\z" f <regexp> match-head ] unit-test
-[ 3 ] [ "foo" "foo\\z" f <regexp> match-head ] unit-test
-
-[ 3 ] [ "foo bar" "foo\\b" f <regexp> match-head ] unit-test
-[ f ] [ "fooxbar" "foo\\b" f <regexp> matches? ] unit-test
-[ t ] [ "foo" "foo\\b" f <regexp> matches? ] unit-test
-[ t ] [ "foo bar" "foo\\b bar" f <regexp> matches? ] unit-test
-[ f ] [ "fooxbar" "foo\\bxbar" f <regexp> matches? ] unit-test
-[ f ] [ "foo" "foo\\bbar" f <regexp> matches? ] unit-test
-
-[ f ] [ "foo bar" "foo\\B" f <regexp> matches? ] unit-test
-[ 3 ] [ "fooxbar" "foo\\B" f <regexp> match-head ] unit-test
-[ t ] [ "foo" "foo\\B" f <regexp> matches? ] unit-test
-[ f ] [ "foo bar" "foo\\B bar" f <regexp> matches? ] unit-test
-[ t ] [ "fooxbar" "foo\\Bxbar" f <regexp> matches? ] unit-test
-[ f ] [ "foo" "foo\\Bbar" f <regexp> matches? ] unit-test
-
-[ t ] [ "s@f" "[a-z.-]@[a-z]" f <regexp> matches? ] unit-test
-[ f ] [ "a" "[a-z.-]@[a-z]" f <regexp> matches? ] unit-test
-[ t ] [ ".o" "\\.[a-z]" f <regexp> matches? ] unit-test
-
-! Bug in parsing word
-[ t ] [
- "a"
- R' a'
- matches?
-] unit-test
+++ /dev/null
-USING: arrays combinators kernel lists math math.parser
-namespaces parser lexer parser-combinators
-parser-combinators.simple promises quotations sequences strings
-math.order assocs prettyprint.backend prettyprint.custom memoize
-ascii unicode.categories combinators.short-circuit
-accessors make io ;
-IN: parser-combinators.regexp
-
-<PRIVATE
-
-SYMBOL: ignore-case?
-
-: char=-quot ( ch -- quot )
- ignore-case? get
- [ ch>upper [ swap ch>upper = ] ] [ [ = ] ] if
- curry ;
-
-: char-between?-quot ( ch1 ch2 -- quot )
- ignore-case? get
- [ [ ch>upper ] bi@ [ [ ch>upper ] 2dip between? ] ]
- [ [ between? ] ]
- if 2curry ;
-
-: <@literal ( parser obj -- action ) [ nip ] curry <@ ;
-
-: <@delay ( parser quot -- action ) [ curry ] curry <@ ;
-
-PRIVATE>
-
-: ascii? ( n -- ? )
- 0 HEX: 7f between? ;
-
-: octal-digit? ( n -- ? )
- CHAR: 0 CHAR: 7 between? ;
-
-: decimal-digit? ( n -- ? )
- CHAR: 0 CHAR: 9 between? ;
-
-: hex-digit? ( n -- ? )
- dup decimal-digit?
- over CHAR: a CHAR: f between? or
- swap CHAR: A CHAR: F between? or ;
-
-: control-char? ( n -- ? )
- dup 0 HEX: 1f between?
- swap HEX: 7f = or ;
-
-: punct? ( n -- ? )
- "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
-
-: c-identifier-char? ( ch -- ? )
- dup alpha? swap CHAR: _ = or ;
-
-: java-blank? ( n -- ? )
- {
- CHAR: \s
- CHAR: \t CHAR: \n CHAR: \r
- HEX: c HEX: 7 HEX: 1b
- } member? ;
-
-: java-printable? ( n -- ? )
- dup alpha? swap punct? or ;
-
-: 'ordinary-char' ( -- parser )
- [ "\\^*+?|(){}[$" member? not ] satisfy
- [ char=-quot ] <@ ;
-
-: 'octal-digit' ( -- parser ) [ octal-digit? ] satisfy ;
-
-: 'octal' ( -- parser )
- "0" token 'octal-digit' 1 3 from-m-to-n &>
- [ oct> ] <@ ;
-
-: 'hex-digit' ( -- parser ) [ hex-digit? ] satisfy ;
-
-: 'hex' ( -- parser )
- "x" token 'hex-digit' 2 exactly-n &>
- "u" token 'hex-digit' 6 exactly-n &> <|>
- [ hex> ] <@ ;
-
-: satisfy-tokens ( assoc -- parser )
- [ [ token ] dip <@literal ] { } assoc>map <or-parser> ;
-
-: 'simple-escape-char' ( -- parser )
- {
- { "\\" CHAR: \\ }
- { "t" CHAR: \t }
- { "n" CHAR: \n }
- { "r" CHAR: \r }
- { "f" HEX: c }
- { "a" HEX: 7 }
- { "e" HEX: 1b }
- } [ char=-quot ] assoc-map satisfy-tokens ;
-
-: 'predefined-char-class' ( -- parser )
- {
- { "d" [ digit? ] }
- { "D" [ digit? not ] }
- { "s" [ java-blank? ] }
- { "S" [ java-blank? not ] }
- { "w" [ c-identifier-char? ] }
- { "W" [ c-identifier-char? not ] }
- } satisfy-tokens ;
-
-: 'posix-character-class' ( -- parser )
- {
- { "Lower" [ letter? ] }
- { "Upper" [ LETTER? ] }
- { "ASCII" [ ascii? ] }
- { "Alpha" [ Letter? ] }
- { "Digit" [ digit? ] }
- { "Alnum" [ alpha? ] }
- { "Punct" [ punct? ] }
- { "Graph" [ java-printable? ] }
- { "Print" [ java-printable? ] }
- { "Blank" [ " \t" member? ] }
- { "Cntrl" [ control-char? ] }
- { "XDigit" [ hex-digit? ] }
- { "Space" [ java-blank? ] }
- } satisfy-tokens "p{" "}" surrounded-by ;
-
-: 'simple-escape' ( -- parser )
- 'octal'
- 'hex' <|>
- "c" token [ LETTER? ] satisfy &> <|>
- any-char-parser <|>
- [ char=-quot ] <@ ;
-
-: 'escape' ( -- parser )
- "\\" token
- 'simple-escape-char'
- 'predefined-char-class' <|>
- 'posix-character-class' <|>
- 'simple-escape' <|> &> ;
-
-: 'any-char' ( -- parser )
- "." token [ drop t ] <@literal ;
-
-: 'char' ( -- parser )
- 'any-char' 'escape' 'ordinary-char' <|> <|> [ satisfy ] <@ ;
-
-DEFER: 'regexp'
-
-TUPLE: group-result str ;
-
-C: <group-result> group-result
-
-: 'non-capturing-group' ( -- parser )
- "?:" token 'regexp' &> ;
-
-: 'positive-lookahead-group' ( -- parser )
- "?=" token 'regexp' &> [ ensure ] <@ ;
-
-: 'negative-lookahead-group' ( -- parser )
- "?!" token 'regexp' &> [ ensure-not ] <@ ;
-
-: 'simple-group' ( -- parser )
- 'regexp' [ [ <group-result> ] <@ ] <@ ;
-
-: 'group' ( -- parser )
- 'non-capturing-group'
- 'positive-lookahead-group'
- 'negative-lookahead-group'
- 'simple-group' <|> <|> <|>
- "(" ")" surrounded-by ;
-
-: 'range' ( -- parser )
- [ CHAR: ] = not ] satisfy "-" token <&
- [ CHAR: ] = not ] satisfy <&>
- [ first2 char-between?-quot ] <@ ;
-
-: 'character-class-term' ( -- parser )
- 'range'
- 'escape' <|>
- [ "\\]" member? not ] satisfy [ char=-quot ] <@ <|> ;
-
-: 'positive-character-class' ( -- parser )
- "]" token [ CHAR: ] = ] <@literal 'character-class-term' <*> <&:>
- 'character-class-term' <+> <|>
- [ [ 1|| ] curry ] <@ ;
-
-: 'negative-character-class' ( -- parser )
- "^" token 'positive-character-class' &>
- [ [ not ] append ] <@ ;
-
-: 'character-class' ( -- parser )
- 'negative-character-class' 'positive-character-class' <|>
- "[" "]" surrounded-by [ satisfy ] <@ ;
-
-: 'escaped-seq' ( -- parser )
- any-char-parser <*>
- [ ignore-case? get <token-parser> ] <@
- "\\Q" "\\E" surrounded-by ;
-
-: 'break' ( quot -- parser )
- satisfy ensure epsilon just <|> ;
-
-: 'break-escape' ( -- parser )
- "$" token [ "\r\n" member? ] 'break' <@literal
- "\\b" token [ blank? ] 'break' <@literal <|>
- "\\B" token [ blank? not ] 'break' <@literal <|>
- "\\z" token epsilon just <@literal <|> ;
-
-: 'simple' ( -- parser )
- 'escaped-seq'
- 'break-escape' <|>
- 'group' <|>
- 'character-class' <|>
- 'char' <|> ;
-
-: 'exactly-n' ( -- parser )
- 'integer' [ exactly-n ] <@delay ;
-
-: 'at-least-n' ( -- parser )
- 'integer' "," token <& [ at-least-n ] <@delay ;
-
-: 'at-most-n' ( -- parser )
- "," token 'integer' &> [ at-most-n ] <@delay ;
-
-: 'from-m-to-n' ( -- parser )
- 'integer' "," token <& 'integer' <&> [ first2 from-m-to-n ] <@delay ;
-
-: 'greedy-interval' ( -- parser )
- 'exactly-n' 'at-least-n' <|> 'at-most-n' <|> 'from-m-to-n' <|> ;
-
-: 'interval' ( -- parser )
- 'greedy-interval'
- 'greedy-interval' "?" token <& [ "reluctant {}" print ] <@ <|>
- 'greedy-interval' "+" token <& [ "possessive {}" print ] <@ <|>
- "{" "}" surrounded-by ;
-
-: 'repetition' ( -- parser )
- ! Posessive
- "*+" token [ <!*> ] <@literal
- "++" token [ <!+> ] <@literal <|>
- "?+" token [ <!?> ] <@literal <|>
- ! Reluctant
- "*?" token [ <(*)> ] <@literal <|>
- "+?" token [ <(+)> ] <@literal <|>
- "??" token [ <(?)> ] <@literal <|>
- ! Greedy
- "*" token [ <*> ] <@literal <|>
- "+" token [ <+> ] <@literal <|>
- "?" token [ <?> ] <@literal <|> ;
-
-: 'dummy' ( -- parser )
- epsilon [ ] <@literal ;
-
-MEMO: 'term' ( -- parser )
- 'simple'
- 'repetition' 'interval' 'dummy' <|> <|> <&> [ first2 call ] <@
- <!+> [ <and-parser> ] <@ ;
-
-LAZY: 'regexp' ( -- parser )
- 'term' "|" token nonempty-list-of [ <or-parser> ] <@ ;
-! "^" token 'term' "|" token nonempty-list-of [ <or-parser> ] <@
-! &> [ "caret" print ] <@ <|>
-! 'term' "|" token nonempty-list-of [ <or-parser> ] <@
-! "$" token <& [ "dollar" print ] <@ <|>
-! "^" token 'term' "|" token nonempty-list-of [ <or-parser> ] <@ &>
-! "$" token [ "caret dollar" print ] <@ <& <|> ;
-
-TUPLE: regexp source parser ignore-case? ;
-
-: <regexp> ( string ignore-case? -- regexp )
- [
- ignore-case? [
- dup 'regexp' just parse-1
- ] with-variable
- ] keep regexp boa ;
-
-: do-ignore-case ( string regexp -- string regexp )
- dup ignore-case?>> [ [ >upper ] dip ] when ;
-
-: matches? ( string regexp -- ? )
- do-ignore-case parser>> just parse nil? not ;
-
-: match-head ( string regexp -- end )
- do-ignore-case parser>> parse dup nil?
- [ drop f ] [ car unparsed>> from>> ] if ;
-
-! Literal syntax for regexps
-: parse-options ( string -- ? )
- #! Lame
- {
- { "" [ f ] }
- { "i" [ t ] }
- } case ;
-
-: parse-regexp ( accum end -- accum )
- lexer get dup skip-blank
- [ [ index-from dup 1+ swap ] 2keep swapd subseq swap ] change-lexer-column
- lexer get dup still-parsing-line?
- [ (parse-token) parse-options ] [ drop f ] if
- <regexp> parsed ;
-
-: R! CHAR: ! parse-regexp ; parsing
-: R" CHAR: " parse-regexp ; parsing
-: R# CHAR: # parse-regexp ; parsing
-: R' CHAR: ' parse-regexp ; parsing
-: R( CHAR: ) parse-regexp ; parsing
-: R/ CHAR: / parse-regexp ; parsing
-: R@ CHAR: @ parse-regexp ; parsing
-: R[ CHAR: ] parse-regexp ; parsing
-: R` CHAR: ` parse-regexp ; parsing
-: R{ CHAR: } parse-regexp ; parsing
-: R| CHAR: | parse-regexp ; parsing
-
-: find-regexp-syntax ( string -- prefix suffix )
- {
- { "R/ " "/" }
- { "R! " "!" }
- { "R\" " "\"" }
- { "R# " "#" }
- { "R' " "'" }
- { "R( " ")" }
- { "R@ " "@" }
- { "R[ " "]" }
- { "R` " "`" }
- { "R{ " "}" }
- { "R| " "|" }
- } swap [ subseq? not nip ] curry assoc-find drop ;
-
-M: regexp pprint*
- [
- dup source>>
- dup find-regexp-syntax swap % swap % %
- dup ignore-case?>> [ "i" % ] when
- ] "" make
- swap present-text ;