! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
-! See http://factorcode.org/license.txt for BSD license.
-USING: peg.ebnf kernel math.parser sequences assocs arrays fry math
-combinators regexp.classes strings splitting peg locals accessors
-regexp.ast unicode.case unicode.script.private unicode.categories
-memoize interval-maps sets unicode.data combinators.short-circuit ;
+! See https://factorcode.org/license.txt for BSD license.
+USING: accessors arrays assocs combinators
+combinators.short-circuit interval-maps kernel math.parser
+multiline peg.ebnf regexp.ast regexp.classes sequences sets
+splitting strings unicode unicode.data unicode.script ;
IN: regexp.parser
: allowed-char? ( ch -- ? )
: simple ( str -- simple )
! Alternatively, first collation key level?
- >case-fold [ " \t_" member? not ] filter ;
+ >case-fold [ " \t_" member? ] reject ;
: simple-table ( seq -- table )
[ [ simple ] keep ] H{ } map>assoc ;
simple-category-table at <category-class>
] }
{ [ "script=" ?head ] [
- dup simple-script-table at
+ [ simple-script-table at ]
[ <script-class> ]
[ "script=" prepend bad-class ] ?if
] }
} cond ;
: unicode-class ( name -- class )
- dup parse-unicode-class [ ] [ bad-class ] ?if ;
+ [ parse-unicode-class ] [ bad-class ] ?unless ;
: name>class ( name -- class )
>string simple {
: lookup-escape ( char -- ast )
{
- { CHAR: t [ CHAR: \t ] }
+ { CHAR: a [ CHAR: \a ] }
+ { CHAR: e [ CHAR: \e ] }
+ { CHAR: f [ CHAR: \f ] }
{ CHAR: n [ CHAR: \n ] }
{ CHAR: r [ CHAR: \r ] }
- { CHAR: f [ HEX: c ] }
- { CHAR: a [ HEX: 7 ] }
- { CHAR: e [ HEX: 1b ] }
- { CHAR: \\ [ CHAR: \\ ] }
+ { CHAR: t [ CHAR: \t ] }
+ { CHAR: v [ CHAR: \v ] }
+ { CHAR: 0 [ CHAR: \0 ] }
{ CHAR: w [ c-identifier-class <primitive-class> ] }
{ CHAR: W [ c-identifier-class <primitive-class> <not-class> ] }
ERROR: nonexistent-option name ;
: ch>option ( ch -- singleton )
- dup options-assoc at [ ] [ nonexistent-option ] ?if ;
+ [ options-assoc at ] [ nonexistent-option ] ?unless ;
: option>ch ( option -- string )
options-assoc value-at ;
: string>options ( string -- options )
"-" split1 parse-options ;
-
+
: options>string ( options -- string )
[ on>> ] [ off>> ] bi
[ [ option>ch ] map ] bi@
! add greedy and nongreedy forms of matching
! (once it's all implemented)
-EBNF: parse-regexp
+EBNF: parse-regexp [=[
CharacterInBracket = !("}") Character
QuotedCharacter = !("\\E") .
Escape = "p{" CharacterInBracket*:s "}" => [[ s name>class <primitive-class> ]]
- | "P{" CharacterInBracket*:s "}" => [[ s name>class <primitive-class> <negation> ]]
+ | "P{" CharacterInBracket*:s "}" => [[ s name>class <primitive-class> <not-class> ]]
| "Q" QuotedCharacter*:s "\\E" => [[ s <concatenation> ]]
| "u" Character:a Character:b Character:c Character:d
=> [[ { a b c d } hex> ensure-number ]]
EscapeSequence = "\\" Escape:e => [[ e ]]
Character = EscapeSequence
- | "$" => [[ $ <tagged-epsilon> ]]
- | "^" => [[ ^ <tagged-epsilon> ]]
+ | "$" => [[ $crlf <tagged-epsilon> ]]
+ | "^" => [[ ^crlf <tagged-epsilon> ]]
| . ?[ allowed-char? ]?
AnyRangeCharacter = !("&&"|"||"|"--"|"~~") (EscapeSequence | .)
End = !(.)
Main = Alternation End
-;EBNF
+]=]