extra/parser-combinators/parser-combinators.factor

   1 ! Copyright (C) 2004 Chris Double.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: lists lists.lazy promises kernel sequences strings math
   4 arrays splitting quotations combinators namespaces locals
   5 unicode.case unicode.categories sequences.deep accessors ;
   6 IN: parser-combinators
   7
   8 ! Parser combinator protocol
   9 GENERIC: parse ( input parser -- list )
  10
  11 M: promise parse ( input parser -- list )
  12     force parse ;
  13
  14 TUPLE: parse-result parsed unparsed ;
  15
  16 ERROR: cannot-parse input ;
  17
  18 : parse-1 ( input parser -- result )
  19     dupd parse dup nil? [
  20         swap cannot-parse
  21     ] [
  22         nip car parsed>>
  23     ] if ;
  24
  25 C: <parse-result> parse-result
  26
  27 : <parse-results> ( parsed unparsed -- list )
  28     <parse-result> 1list ;
  29
  30 : parse-result-parsed-slice ( parse-result -- slice )
  31     dup parsed>> empty? [
  32         unparsed>> 0 0 rot <slice>
  33     ] [
  34         dup unparsed>>
  35         dup from>> [ rot parsed>> length - ] keep
  36         rot seq>> <slice>
  37     ] if ;
  38
  39 : string= ( str1 str2 ignore-case -- ? )
  40     [ [ >upper ] bi@ ] when sequence= ;
  41
  42 : string-head? ( str head ignore-case -- ? )
  43     2over shorter? [
  44         3drop f
  45     ] [
  46         [ [ length head-slice ] keep ] dip string=
  47     ] if ;
  48
  49 : ?string-head ( str head ignore-case -- newstr ? )
  50     [ 2dup ] dip string-head?
  51     [ length tail-slice t ] [ drop f ] if ;
  52
  53 TUPLE: token-parser string ignore-case? ;
  54
  55 C: <token-parser> token-parser
  56
  57 : token ( string -- parser ) f <token-parser> ;
  58
  59 : case-insensitive-token ( string -- parser ) t <token-parser> ;
  60
  61 M:: token-parser parse ( input parser -- list )
  62     parser string>> :> str
  63     parser ignore-case?>> :> case?
  64
  65     str input str case? ?string-head
  66     [ <parse-results> ] [ 2drop nil ] if ;
  67
  68 : 1token ( n -- parser ) 1string token ;
  69
  70 TUPLE: satisfy-parser quot ;
  71
  72 C: satisfy satisfy-parser
  73
  74 M: satisfy-parser parse ( input parser -- list )
  75     ! A parser that succeeds if the predicate,
  76     ! when passed the first character in the input, returns
  77     ! true.
  78     over empty? [
  79         2drop nil
  80     ] [
  81         quot>> [ unclip-slice dup ] dip call( char -- ? )
  82         [ swap <parse-results> ] [ 2drop nil ] if
  83     ] if ;
  84
  85 LAZY: any-char-parser ( -- parser )
  86     [ drop t ] satisfy ;
  87
  88 TUPLE: epsilon-parser ;
  89
  90 C: epsilon epsilon-parser
  91
  92 M: epsilon-parser parse ( input parser -- list )
  93     ! A parser that parses the empty string. It
  94     ! does not consume any input and always returns
  95     ! an empty list as the parse tree with the
  96     ! unmodified input.
  97     drop "" swap <parse-results> ;
  98
  99 TUPLE: succeed-parser result ;
 100
 101 C: succeed succeed-parser
 102
 103 M: succeed-parser parse ( input parser -- list )
 104     ! A parser that always returns 'result' as a
 105     ! successful parse with no input consumed.
 106     result>> swap <parse-results> ;
 107
 108 TUPLE: fail-parser ;
 109
 110 C: fail fail-parser
 111
 112 M: fail-parser parse ( input parser -- list )
 113     ! A parser that always fails and returns
 114     ! an empty list of successes.
 115     2drop nil ;
 116
 117 TUPLE: ensure-parser test ;
 118
 119 : ensure ( parser -- ensure )
 120     ensure-parser boa ;
 121
 122 M: ensure-parser parse ( input parser -- list )
 123     2dup test>> parse nil?
 124     [ 2drop nil ] [ drop t swap <parse-results> ] if ;
 125
 126 TUPLE: ensure-not-parser test ;
 127
 128 : ensure-not ( parser -- ensure )
 129     ensure-not-parser boa ;
 130
 131 M: ensure-not-parser parse ( input parser -- list )
 132     2dup test>> parse nil?
 133     [ drop t swap <parse-results> ] [ 2drop nil ] if ;
 134
 135 TUPLE: and-parser parsers ;
 136
 137 : <&> ( parser1 parser2 -- parser )
 138     over and-parser? [
 139         [ parsers>> ] dip suffix
 140     ] [
 141         2array
 142     ] if and-parser boa ;
 143
 144 : <and-parser> ( parsers -- parser )
 145     dup length 1 = [ first ] [ and-parser boa ] if ;
 146
 147 : and-parser-parse ( list p1  -- list )
 148     swap [
 149         dup unparsed>> rot parse
 150         [
 151             [ parsed>> ] dip
 152             [ parsed>> 2array ] keep
 153             unparsed>> <parse-result>
 154         ] with lazy-map
 155     ] with lazy-map lconcat ;
 156
 157 M: and-parser parse ( input parser -- list )
 158     ! Parse 'input' by sequentially combining the
 159     ! two parsers. First parser1 is applied to the
 160     ! input then parser2 is applied to the rest of
 161     ! the input strings from the first parser.
 162     parsers>> unclip swapd parse
 163     [ [ and-parser-parse ] reduce ] 2curry <promise> ;
 164
 165 TUPLE: or-parser parsers ;
 166
 167 : <or-parser> ( parsers -- parser )
 168     dup length 1 = [ first ] [ or-parser boa ] if ;
 169
 170 : <|> ( parser1 parser2 -- parser )
 171     2array <or-parser> ;
 172
 173 M: or-parser parse ( input parser1 -- list )
 174     ! Return the combined list resulting from the parses
 175     ! of parser1 and parser2 being applied to the same
 176     ! input. This implements the choice parsing operator.
 177     parsers>> sequence>list
 178     [ parse ] with lazy-map lconcat ;
 179
 180 : trim-head-slice ( string -- string )
 181     ! Return a new string without any leading whitespace
 182     ! from the original string.
 183     dup empty? [
 184         dup first blank? [ rest-slice trim-head-slice ] when
 185     ] unless ;
 186
 187 TUPLE: sp-parser p1 ;
 188
 189 #! Return a parser that first skips all whitespace before
 190 #! calling the original parser.
 191 C: sp sp-parser
 192
 193 M: sp-parser parse ( input parser -- list )
 194     ! Skip all leading whitespace from the input then call
 195     ! the parser on the remaining input.
 196     [ trim-head-slice ] dip p1>> parse ;
 197
 198 TUPLE: just-parser p1 ;
 199
 200 C: just just-parser
 201
 202 M: just-parser parse ( input parser -- result )
 203     ! Calls the given parser on the input removes
 204     ! from the results anything where the remaining
 205     ! input to be parsed is not empty. So ensures a
 206     ! fully parsed input string.
 207     p1>> parse [ unparsed>> empty? ] lfilter ;
 208
 209 TUPLE: apply-parser p1 quot ;
 210
 211 C: <@ apply-parser
 212
 213 M: apply-parser parse ( input parser -- result )
 214     ! Calls the parser on the input. For each successful
 215     ! parse the quot is call with the parse result on the stack.
 216     ! The result of that quotation then becomes the new parse result.
 217     ! This allows modification of parse tree results (like
 218     ! converting strings to integers, etc).
 219     [ p1>> ] [ quot>> ] bi
 220     -rot parse [
 221         [ parsed>> swap call ] keep
 222         unparsed>> <parse-result>
 223     ] with lazy-map ;
 224
 225 TUPLE: some-parser p1 ;
 226
 227 C: some some-parser
 228
 229 M: some-parser parse ( input parser -- result )
 230     ! Calls the parser on the input, guarantees
 231     ! the parse is complete (the remaining input is empty),
 232     ! picks the first solution and only returns the parse
 233     ! tree since the remaining input is empty.
 234     p1>> just parse-1 ;
 235
 236 : <& ( parser1 parser2 -- parser )
 237     ! Same as <&> except discard the results of the second parser.
 238     <&> [ first ] <@ ;
 239
 240 : &> ( parser1 parser2 -- parser )
 241     ! Same as <&> except discard the results of the first parser.
 242     <&> [ second ] <@ ;
 243
 244 : <:&> ( parser1 parser2 -- result )
 245     ! Same as <&> except flatten the result.
 246     <&> [ first2 suffix ] <@ ;
 247
 248 : <&:> ( parser1 parser2 -- result )
 249     ! Same as <&> except flatten the result.
 250     <&> [ first2 swap prefix ] <@ ;
 251
 252 : <:&:> ( parser1 parser2 -- result )
 253     ! Same as <&> except flatten the result.
 254     <&> [ first2 append ] <@ ;
 255
 256 LAZY: <*> ( parser -- parser )
 257     dup <*> <&:> { } succeed <|> ;
 258
 259 : <+> ( parser -- parser )
 260     ! Return a parser that accepts one or more occurences of the original
 261     ! parser.
 262     dup <*> <&:> ;
 263
 264 LAZY: <?> ( parser -- parser )
 265     ! Return a parser that optionally uses the parser
 266     ! if that parser would be successful.
 267     [ 1array ] <@ f succeed <|> ;
 268
 269 TUPLE: only-first-parser p1 ;
 270
 271 LAZY: only-first ( parser -- parser )
 272     only-first-parser boa ;
 273
 274 M: only-first-parser parse ( input parser -- list )
 275     ! Transform a parser into a parser that only yields
 276     ! the first possibility.
 277     p1>> parse 1 swap ltake ;
 278
 279 LAZY: <!*> ( parser -- parser )
 280     ! Like <*> but only return one possible result
 281     ! containing all matching parses. Does not return
 282     ! partial matches. Useful for efficiency since that's
 283     ! usually the effect you want and cuts down on backtracking
 284     ! required.
 285     <*> only-first ;
 286
 287 LAZY: <!+> ( parser -- parser )
 288     ! Like <+> but only return one possible result
 289     ! containing all matching parses. Does not return
 290     ! partial matches. Useful for efficiency since that's
 291     ! usually the effect you want and cuts down on backtracking
 292     ! required.
 293     <+> only-first ;
 294
 295 LAZY: <!?> ( parser -- parser )
 296     ! Like <?> but only return one possible result
 297     ! containing all matching parses. Does not return
 298     ! partial matches. Useful for efficiency since that's
 299     ! usually the effect you want and cuts down on backtracking
 300     ! required.
 301     <?> only-first ;
 302
 303 LAZY: <(?)> ( parser -- parser )
 304     ! Like <?> but take shortest match first.
 305     f succeed swap [ 1array ] <@ <|> ;
 306
 307 LAZY: <(*)> ( parser -- parser )
 308     ! Like <*> but take shortest match first.
 309     ! Implementation by Matthew Willis.
 310     { } succeed swap dup <(*)> <&:> <|> ;
 311
 312 LAZY: <(+)> ( parser -- parser )
 313     ! Like <+> but take shortest match first.
 314     ! Implementation by Matthew Willis.
 315     dup <(*)> <&:> ;
 316
 317 : pack ( close body open -- parser )
 318     ! Parse a construct enclosed by two symbols,
 319     ! given a parser for the opening symbol, the
 320     ! closing symbol, and the body.
 321     <& &> ;
 322
 323 : nonempty-list-of ( items separator -- parser )
 324     [ over &> <*> <&:> ] keep <?> [ nip ] 2keep pack ;
 325
 326 : list-of ( items separator -- parser )
 327     ! Given a parser for the separator and for the
 328     ! items themselves, return a parser that parses
 329     ! lists of those items. The parse tree is an
 330     ! array of the parsed items.
 331     nonempty-list-of { } succeed <|> ;
 332
 333 LAZY: surrounded-by ( parser start end -- parser' )
 334     [ token ] bi@ swapd pack ;
 335
 336 : exactly-n ( parser n -- parser' )
 337     swap <repetition> <and-parser> [ flatten ] <@ ;
 338
 339 : at-most-n ( parser n -- parser' )
 340     dup zero? [
 341         2drop epsilon
 342     ] [
 343         2dup exactly-n
 344         -rot 1 - at-most-n <|>
 345     ] if ;
 346
 347 : at-least-n ( parser n -- parser' )
 348     dupd exactly-n swap <*> <&> ;
 349
 350 : from-m-to-n ( parser m n -- parser' )
 351     [ [ exactly-n ] 2keep ] dip swap - at-most-n <:&:> ;