1 ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: peg.ebnf kernel math.parser sequences assocs arrays fry math
4 combinators regexp.classes strings splitting peg locals accessors
8 : allowed-char? ( ch -- ? )
9 ".()|[*+?" member? not ;
13 : ensure-number ( n -- n )
14 [ bad-number ] unless* ;
16 :: at-error ( key assoc quot: ( key -- replacement ) -- value )
17 key assoc at* [ drop key quot call ] unless ; inline
19 ERROR: bad-class name ;
21 : name>class ( name -- class )
23 { "Lower" letter-class }
24 { "Upper" LETTER-class }
25 { "Alpha" Letter-class }
26 { "ASCII" ascii-class }
27 { "Digit" digit-class }
28 { "Alnum" alpha-class }
29 { "Punct" punctuation-class }
30 { "Graph" java-printable-class }
31 { "Print" java-printable-class }
32 { "Blank" non-newline-blank-class }
33 { "Cntrl" control-character-class }
34 { "XDigit" hex-digit-class }
35 { "Space" java-blank-class }
36 ! TODO: unicode-character-class
37 } [ bad-class ] at-error ;
39 : lookup-escape ( char -- ast )
41 { CHAR: t [ CHAR: \t ] }
42 { CHAR: n [ CHAR: \n ] }
43 { CHAR: r [ CHAR: \r ] }
44 { CHAR: f [ HEX: c ] }
45 { CHAR: a [ HEX: 7 ] }
46 { CHAR: e [ HEX: 1b ] }
47 { CHAR: \\ [ CHAR: \\ ] }
49 { CHAR: w [ c-identifier-class <primitive-class> ] }
50 { CHAR: W [ c-identifier-class <primitive-class> <not-class> ] }
51 { CHAR: s [ java-blank-class <primitive-class> ] }
52 { CHAR: S [ java-blank-class <primitive-class> <not-class> ] }
53 { CHAR: d [ digit-class <primitive-class> ] }
54 { CHAR: D [ digit-class <primitive-class> <not-class> ] }
59 : options-assoc ( -- assoc )
61 { CHAR: i case-insensitive }
62 { CHAR: d unix-lines }
65 { CHAR: r reversed-regexp }
67 { CHAR: u unicode-case }
71 : ch>option ( ch -- singleton )
74 : option>ch ( option -- string )
75 options-assoc value-at ;
77 : parse-options ( on off -- options )
78 [ [ ch>option ] { } map-as ] bi@ <options> ;
80 : string>options ( string -- options )
81 "-" split1 parse-options ;
83 : options>string ( options -- string )
85 [ [ option>ch ] map ] bi@
86 [ "-" glue ] unless-empty
89 ! TODO: add syntax for various parenthized things,
90 ! add greedy and nongreedy forms of matching
91 ! (once it's all implemented)
95 CharacterInBracket = !("}") Character
97 QuotedCharacter = !("\\E") .
99 Escape = "p{" CharacterInBracket*:s "}" => [[ s >string name>class <primitive-class> ]]
100 | "P{" CharacterInBracket*:s "}" => [[ s >string name>class <primitive-class> <negation> ]]
101 | "Q" QuotedCharacter*:s "\\E" => [[ s <concatenation> ]]
102 | "u" Character:a Character:b Character:c Character:d
103 => [[ { a b c d } hex> ensure-number ]]
104 | "x" Character:a Character:b
105 => [[ { a b } hex> ensure-number ]]
106 | "0" Character:a Character:b Character:c
107 => [[ { a b c } oct> ensure-number ]]
108 | . => [[ lookup-escape ]]
110 EscapeSequence = "\\" Escape:e => [[ e ]]
112 Character = EscapeSequence | . ?[ allowed-char? ]?
114 AnyRangeCharacter = EscapeSequence | .
116 RangeCharacter = !("]") AnyRangeCharacter
118 Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b <range> ]]
121 StartRange = AnyRangeCharacter:a "-" RangeCharacter:b => [[ a b <range> ]]
124 Ranges = StartRange:s Range*:r => [[ r s prefix ]]
126 CharClass = "^"?:n Ranges:e => [[ e n char-class ]]
130 Parenthized = "?:" Alternation:a => [[ a ]]
131 | "?" Options:on "-"? Options:off ":" Alternation:a
132 => [[ a on off parse-options <with-options> ]]
133 | "?#" [^)]* => [[ f ]]
134 | "?~" Alternation:a => [[ a <negation> ]]
135 | "?=" Alternation:a => [[ a <lookahead> ]]
136 | "?!" Alternation:a => [[ a <negation> <lookahead> ]]
137 | "?<=" Alternation:a => [[ a <lookbehind> ]]
138 | "?<!" Alternation:a => [[ a <negation> <lookbehind> ]]
141 Element = "(" Parenthized:p ")" => [[ p ]]
142 | "[" CharClass:r "]" => [[ r ]]
143 | ".":d => [[ any-char <primitive-class> ]]
146 Number = (!(","|"}").)* => [[ string>number ensure-number ]]
148 Times = "," Number:n "}" => [[ 0 n <from-to> ]]
149 | Number:n ",}" => [[ n <at-least> ]]
150 | Number:n "}" => [[ n n <from-to> ]]
151 | "}" => [[ bad-number ]]
152 | Number:n "," Number:m "}" => [[ n m <from-to> ]]
154 Repeated = Element:e "{" Times:t => [[ e t <times> ]]
155 | Element:e "?" => [[ e <maybe> ]]
156 | Element:e "*" => [[ e <star> ]]
157 | Element:e "+" => [[ e <plus> ]]
160 Concatenation = Repeated*:r => [[ r sift <concatenation> ]]
162 Alternation = Concatenation:c ("|" Concatenation)*:a
163 => [[ a empty? [ c ] [ a values c prefix <alternation> ] if ]]
167 Main = Alternation End