extra/parser-combinators/parser-combinators.factor

   1 ! Copyright (C) 2004 Chris Double.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: accessors arrays kernel lists lists.lazy math promises
   4 sequences sequences.deep strings unicode ;
   5 IN: parser-combinators
   6
   7 ! Parser combinator protocol
   8 GENERIC: parse ( input parser -- list )
   9
  10 M: promise parse ( input parser -- list )
  11     force parse ;
  12
  13 TUPLE: parse-result parsed unparsed ;
  14
  15 ERROR: cannot-parse input ;
  16
  17 : parse-1 ( input parser -- result )
  18     dupd parse dup nil? [
  19         swap cannot-parse
  20     ] [
  21         nip car parsed>>
  22     ] if ;
  23
  24 C: <parse-result> parse-result
  25
  26 : <parse-results> ( parsed unparsed -- list )
  27     <parse-result> 1list ;
  28
  29 : parse-result-parsed-slice ( parse-result -- slice )
  30     dup parsed>> empty? [
  31         unparsed>> 0 0 rot <slice>
  32     ] [
  33         dup unparsed>>
  34         dup from>> [ rot parsed>> length - ] keep
  35         rot seq>> <slice>
  36     ] if ;
  37
  38 : string= ( str1 str2 ignore-case -- ? )
  39     [ [ >upper ] bi@ ] when sequence= ;
  40
  41 : string-head? ( str head ignore-case -- ? )
  42     2over shorter? [
  43         3drop f
  44     ] [
  45         [ [ length head-slice ] keep ] dip string=
  46     ] if ;
  47
  48 : ?string-head ( str head ignore-case -- newstr ? )
  49     [ 2dup ] dip string-head?
  50     [ length tail-slice t ] [ drop f ] if ;
  51
  52 TUPLE: token-parser string ignore-case? ;
  53
  54 C: <token-parser> token-parser
  55
  56 : token ( string -- parser ) f <token-parser> ;
  57
  58 : case-insensitive-token ( string -- parser ) t <token-parser> ;
  59
  60 M:: token-parser parse ( input parser -- list )
  61     parser string>> :> str
  62     parser ignore-case?>> :> case?
  63
  64     str input str case? ?string-head
  65     [ <parse-results> ] [ 2drop nil ] if ;
  66
  67 : 1token ( n -- parser ) 1string token ;
  68
  69 TUPLE: satisfy-parser quot ;
  70
  71 C: satisfy satisfy-parser
  72
  73 M: satisfy-parser parse ( input parser -- list )
  74     ! A parser that succeeds if the predicate,
  75     ! when passed the first character in the input, returns
  76     ! true.
  77     over empty? [
  78         2drop nil
  79     ] [
  80         quot>> [ unclip-slice dup ] dip call( char -- ? )
  81         [ swap <parse-results> ] [ 2drop nil ] if
  82     ] if ;
  83
  84 LAZY: any-char-parser ( -- parser )
  85     [ drop t ] satisfy ;
  86
  87 TUPLE: epsilon-parser ;
  88
  89 C: epsilon epsilon-parser
  90
  91 M: epsilon-parser parse ( input parser -- list )
  92     ! A parser that parses the empty string. It
  93     ! does not consume any input and always returns
  94     ! an empty list as the parse tree with the
  95     ! unmodified input.
  96     drop "" swap <parse-results> ;
  97
  98 TUPLE: succeed-parser result ;
  99
 100 C: succeed succeed-parser
 101
 102 M: succeed-parser parse ( input parser -- list )
 103     ! A parser that always returns 'result' as a
 104     ! successful parse with no input consumed.
 105     result>> swap <parse-results> ;
 106
 107 TUPLE: fail-parser ;
 108
 109 C: fail fail-parser
 110
 111 M: fail-parser parse ( input parser -- list )
 112     ! A parser that always fails and returns
 113     ! an empty list of successes.
 114     2drop nil ;
 115
 116 TUPLE: ensure-parser test ;
 117
 118 : ensure ( parser -- ensure )
 119     ensure-parser boa ;
 120
 121 M: ensure-parser parse ( input parser -- list )
 122     2dup test>> parse nil?
 123     [ 2drop nil ] [ drop t swap <parse-results> ] if ;
 124
 125 TUPLE: ensure-not-parser test ;
 126
 127 : ensure-not ( parser -- ensure )
 128     ensure-not-parser boa ;
 129
 130 M: ensure-not-parser parse ( input parser -- list )
 131     2dup test>> parse nil?
 132     [ drop t swap <parse-results> ] [ 2drop nil ] if ;
 133
 134 TUPLE: and-parser parsers ;
 135
 136 : <&> ( parser1 parser2 -- parser )
 137     over and-parser? [
 138         [ parsers>> ] dip suffix
 139     ] [
 140         2array
 141     ] if and-parser boa ;
 142
 143 : <and-parser> ( parsers -- parser )
 144     dup length 1 = [ first ] [ and-parser boa ] if ;
 145
 146 : and-parser-parse ( list p1  -- list )
 147     swap [
 148         dup unparsed>> rot parse
 149         [
 150             [ parsed>> ] dip
 151             [ parsed>> 2array ] keep
 152             unparsed>> <parse-result>
 153         ] with lmap-lazy
 154     ] with lmap-lazy lconcat ;
 155
 156 M: and-parser parse ( input parser -- list )
 157     ! Parse 'input' by sequentially combining the
 158     ! two parsers. First parser1 is applied to the
 159     ! input then parser2 is applied to the rest of
 160     ! the input strings from the first parser.
 161     parsers>> unclip swapd parse
 162     [ [ and-parser-parse ] reduce ] 2curry <promise> ;
 163
 164 TUPLE: or-parser parsers ;
 165
 166 : <or-parser> ( parsers -- parser )
 167     dup length 1 = [ first ] [ or-parser boa ] if ;
 168
 169 : <|> ( parser1 parser2 -- parser )
 170     2array <or-parser> ;
 171
 172 M: or-parser parse ( input parser1 -- list )
 173     ! Return the combined list resulting from the parses
 174     ! of parser1 and parser2 being applied to the same
 175     ! input. This implements the choice parsing operator.
 176     parsers>> sequence>list
 177     [ parse ] with lmap-lazy lconcat ;
 178
 179 : trim-head-slice ( string -- string )
 180     ! Return a new string without any leading whitespace
 181     ! from the original string.
 182     dup empty? [
 183         dup first blank? [ rest-slice trim-head-slice ] when
 184     ] unless ;
 185
 186 TUPLE: sp-parser p1 ;
 187
 188 ! Return a parser that first skips all whitespace before
 189 ! calling the original parser.
 190 C: sp sp-parser
 191
 192 M: sp-parser parse ( input parser -- list )
 193     ! Skip all leading whitespace from the input then call
 194     ! the parser on the remaining input.
 195     [ trim-head-slice ] dip p1>> parse ;
 196
 197 TUPLE: just-parser p1 ;
 198
 199 C: just just-parser
 200
 201 M: just-parser parse ( input parser -- result )
 202     ! Calls the given parser on the input removes
 203     ! from the results anything where the remaining
 204     ! input to be parsed is not empty. So ensures a
 205     ! fully parsed input string.
 206     p1>> parse [ unparsed>> empty? ] lfilter ;
 207
 208 TUPLE: apply-parser p1 quot ;
 209
 210 C: <@ apply-parser
 211
 212 M: apply-parser parse ( input parser -- result )
 213     ! Calls the parser on the input. For each successful
 214     ! parse the quot is call with the parse result on the stack.
 215     ! The result of that quotation then becomes the new parse result.
 216     ! This allows modification of parse tree results (like
 217     ! converting strings to integers, etc).
 218     [ p1>> ] [ quot>> ] bi
 219     -rot parse [
 220         [ parsed>> swap call ] keep
 221         unparsed>> <parse-result>
 222     ] with lmap-lazy ;
 223
 224 TUPLE: some-parser p1 ;
 225
 226 C: some some-parser
 227
 228 M: some-parser parse ( input parser -- result )
 229     ! Calls the parser on the input, guarantees
 230     ! the parse is complete (the remaining input is empty),
 231     ! picks the first solution and only returns the parse
 232     ! tree since the remaining input is empty.
 233     p1>> just parse-1 ;
 234
 235 : <& ( parser1 parser2 -- parser )
 236     ! Same as <&> except discard the results of the second parser.
 237     <&> [ first ] <@ ;
 238
 239 : &> ( parser1 parser2 -- parser )
 240     ! Same as <&> except discard the results of the first parser.
 241     <&> [ second ] <@ ;
 242
 243 : <:&> ( parser1 parser2 -- result )
 244     ! Same as <&> except flatten the result.
 245     <&> [ first2 suffix ] <@ ;
 246
 247 : <&:> ( parser1 parser2 -- result )
 248     ! Same as <&> except flatten the result.
 249     <&> [ first2 swap prefix ] <@ ;
 250
 251 : <:&:> ( parser1 parser2 -- result )
 252     ! Same as <&> except flatten the result.
 253     <&> [ first2 append ] <@ ;
 254
 255 LAZY: <*> ( parser -- parser )
 256     dup <*> <&:> { } succeed <|> ;
 257
 258 : <+> ( parser -- parser )
 259     ! Return a parser that accepts one or more occurrences of the original
 260     ! parser.
 261     dup <*> <&:> ;
 262
 263 LAZY: <?> ( parser -- parser )
 264     ! Return a parser that optionally uses the parser
 265     ! if that parser would be successful.
 266     [ 1array ] <@ f succeed <|> ;
 267
 268 TUPLE: only-first-parser p1 ;
 269
 270 LAZY: only-first ( parser -- parser )
 271     only-first-parser boa ;
 272
 273 M: only-first-parser parse ( input parser -- list )
 274     ! Transform a parser into a parser that only yields
 275     ! the first possibility.
 276     p1>> parse 1 swap ltake ;
 277
 278 LAZY: <!*> ( parser -- parser )
 279     ! Like <*> but only return one possible result
 280     ! containing all matching parses. Does not return
 281     ! partial matches. Useful for efficiency since that's
 282     ! usually the effect you want and cuts down on backtracking
 283     ! required.
 284     <*> only-first ;
 285
 286 LAZY: <!+> ( parser -- parser )
 287     ! Like <+> but only return one possible result
 288     ! containing all matching parses. Does not return
 289     ! partial matches. Useful for efficiency since that's
 290     ! usually the effect you want and cuts down on backtracking
 291     ! required.
 292     <+> only-first ;
 293
 294 LAZY: <!?> ( parser -- parser )
 295     ! Like <?> but only return one possible result
 296     ! containing all matching parses. Does not return
 297     ! partial matches. Useful for efficiency since that's
 298     ! usually the effect you want and cuts down on backtracking
 299     ! required.
 300     <?> only-first ;
 301
 302 LAZY: <(?)> ( parser -- parser )
 303     ! Like <?> but take shortest match first.
 304     f succeed swap [ 1array ] <@ <|> ;
 305
 306 LAZY: <(*)> ( parser -- parser )
 307     ! Like <*> but take shortest match first.
 308     ! Implementation by Matthew Willis.
 309     { } succeed swap dup <(*)> <&:> <|> ;
 310
 311 LAZY: <(+)> ( parser -- parser )
 312     ! Like <+> but take shortest match first.
 313     ! Implementation by Matthew Willis.
 314     dup <(*)> <&:> ;
 315
 316 : pack ( close body open -- parser )
 317     ! Parse a construct enclosed by two symbols,
 318     ! given a parser for the opening symbol, the
 319     ! closing symbol, and the body.
 320     <& &> ;
 321
 322 : nonempty-list-of ( items separator -- parser )
 323     [ over &> <*> <&:> ] keep <?> [ nip ] 2keep pack ;
 324
 325 : list-of ( items separator -- parser )
 326     ! Given a parser for the separator and for the
 327     ! items themselves, return a parser that parses
 328     ! lists of those items. The parse tree is an
 329     ! array of the parsed items.
 330     nonempty-list-of { } succeed <|> ;
 331
 332 LAZY: surrounded-by ( parser start end -- parser' )
 333     [ token ] bi@ swapd pack ;
 334
 335 : exactly-n ( parser n -- parser' )
 336     swap <repetition> <and-parser> [ flatten ] <@ ;
 337
 338 : at-most-n ( parser n -- parser' )
 339     dup zero? [
 340         2drop epsilon
 341     ] [
 342         2dup exactly-n
 343         -rot 1 - at-most-n <|>
 344     ] if ;
 345
 346 : at-least-n ( parser n -- parser' )
 347     dupd exactly-n swap <*> <&> ;
 348
 349 : from-m-to-n ( parser m n -- parser' )
 350     [ [ exactly-n ] 2keep ] dip swap - at-most-n <:&:> ;