extra/parser-combinators/parser-combinators.factor

   1 ! Copyright (C) 2004 Chris Double.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: lazy-lists promises kernel sequences strings math
   4 arrays splitting quotations combinators namespaces
   5 unicode.case unicode.categories sequences.deep ;
   6 IN: parser-combinators
   7
   8 ! Parser combinator protocol
   9 GENERIC: parse ( input parser -- list )
  10
  11 M: promise parse ( input parser -- list )
  12     force parse ;
  13
  14 TUPLE: parse-result parsed unparsed ;
  15
  16 : parse-1 ( input parser -- result )
  17     dupd parse dup nil? [
  18         "Cannot parse " rot append throw
  19     ] [
  20         nip car parse-result-parsed
  21     ] if ;
  22
  23 C: <parse-result> parse-result
  24
  25 : <parse-results> ( parsed unparsed -- list )
  26     <parse-result> 1list ;
  27
  28 : parse-result-parsed-slice ( parse-result -- slice )
  29     dup parse-result-parsed empty? [
  30         parse-result-unparsed 0 0 rot <slice>
  31     ] [
  32         dup parse-result-unparsed
  33         dup slice-from [ rot parse-result-parsed length - ] keep
  34         rot slice-seq <slice>
  35     ] if ;
  36
  37 : string= ( str1 str2 ignore-case -- ? )
  38     [ [ >upper ] bi@ ] when sequence= ;
  39
  40 : string-head? ( str head ignore-case -- ? )
  41     2over shorter? [
  42         3drop f
  43     ] [
  44         >r [ length head-slice ] keep r> string=
  45     ] if ;
  46
  47 : ?string-head ( str head ignore-case -- newstr ? )
  48     >r 2dup r> string-head?
  49     [ length tail-slice t ] [ drop f ] if ;
  50
  51 TUPLE: token-parser string ignore-case? ;
  52
  53 C: <token-parser> token-parser
  54
  55 : token ( string -- parser ) f <token-parser> ;
  56
  57 : case-insensitive-token ( string -- parser ) t <token-parser> ;
  58
  59 M: token-parser parse ( input parser -- list )
  60     dup token-parser-string swap token-parser-ignore-case?
  61     >r tuck r> ?string-head
  62     [ <parse-results> ] [ 2drop nil ] if ;
  63
  64 : 1token ( n -- parser ) 1string token ;
  65
  66 TUPLE: satisfy-parser quot ;
  67
  68 C: satisfy satisfy-parser ( quot -- parser )
  69
  70 M: satisfy-parser parse ( input parser -- list )
  71     #! A parser that succeeds if the predicate,
  72     #! when passed the first character in the input, returns
  73     #! true.
  74     over empty? [
  75         2drop nil
  76     ] [
  77         satisfy-parser-quot >r unclip-slice dup r> call
  78         [ swap <parse-results> ] [ 2drop nil ] if
  79     ] if ;
  80
  81 LAZY: any-char-parser ( -- parser )
  82     [ drop t ] satisfy ;
  83
  84 TUPLE: epsilon-parser ;
  85
  86 C: epsilon epsilon-parser ( -- parser )
  87
  88 M: epsilon-parser parse ( input parser -- list )
  89     #! A parser that parses the empty string. It
  90     #! does not consume any input and always returns
  91     #! an empty list as the parse tree with the
  92     #! unmodified input.
  93     drop "" swap <parse-results> ;
  94
  95 TUPLE: succeed-parser result ;
  96
  97 C: succeed succeed-parser ( result -- parser )
  98
  99 M: succeed-parser parse ( input parser -- list )
 100     #! A parser that always returns 'result' as a
 101     #! successful parse with no input consumed.
 102     succeed-parser-result swap <parse-results> ;
 103
 104 TUPLE: fail-parser ;
 105
 106 C: fail fail-parser ( -- parser )
 107
 108 M: fail-parser parse ( input parser -- list )
 109     #! A parser that always fails and returns
 110     #! an empty list of successes.
 111     2drop nil ;
 112
 113 TUPLE: ensure-parser test ;
 114
 115 : ensure ( parser -- ensure )
 116     ensure-parser boa ;
 117
 118 M: ensure-parser parse ( input parser -- list )
 119     2dup ensure-parser-test parse nil?
 120     [ 2drop nil ] [ drop t swap <parse-results> ] if ;
 121
 122 TUPLE: ensure-not-parser test ;
 123
 124 : ensure-not ( parser -- ensure )
 125     ensure-not-parser boa ;
 126
 127 M: ensure-not-parser parse ( input parser -- list )
 128     2dup ensure-not-parser-test parse nil?
 129     [ drop t swap <parse-results> ] [ 2drop nil ] if ;
 130
 131 TUPLE: and-parser parsers ;
 132
 133 : <&> ( parser1 parser2 -- parser )
 134     over and-parser? [
 135         >r and-parser-parsers r> suffix
 136     ] [
 137         2array
 138     ] if and-parser boa ;
 139
 140 : <and-parser> ( parsers -- parser )
 141     dup length 1 = [ first ] [ and-parser boa ] if ;
 142
 143 : and-parser-parse ( list p1  -- list )
 144     swap [
 145         dup parse-result-unparsed rot parse
 146         [
 147             >r parse-result-parsed r>
 148             [ parse-result-parsed 2array ] keep
 149             parse-result-unparsed <parse-result>
 150         ] lmap-with
 151     ] lmap-with lconcat ;
 152
 153 M: and-parser parse ( input parser -- list )
 154     #! Parse 'input' by sequentially combining the
 155     #! two parsers. First parser1 is applied to the
 156     #! input then parser2 is applied to the rest of
 157     #! the input strings from the first parser.
 158     and-parser-parsers unclip swapd parse
 159     [ [ and-parser-parse ] reduce ] 2curry promise ;
 160
 161 TUPLE: or-parser parsers ;
 162
 163 : <or-parser> ( parsers -- parser )
 164     dup length 1 = [ first ] [ or-parser boa ] if ;
 165
 166 : <|> ( parser1 parser2 -- parser )
 167     2array <or-parser> ;
 168
 169 M: or-parser parse ( input parser1 -- list )
 170     #! Return the combined list resulting from the parses
 171     #! of parser1 and parser2 being applied to the same
 172     #! input. This implements the choice parsing operator.
 173     or-parser-parsers 0 swap seq>list
 174     [ parse ] lmap-with lconcat ;
 175
 176 : left-trim-slice ( string -- string )
 177     #! Return a new string without any leading whitespace
 178     #! from the original string.
 179     dup empty? [
 180         dup first blank? [ rest-slice left-trim-slice ] when
 181     ] unless ;
 182
 183 TUPLE: sp-parser p1 ;
 184
 185 #! Return a parser that first skips all whitespace before
 186 #! calling the original parser.
 187 C: sp sp-parser ( p1 -- parser )
 188
 189 M: sp-parser parse ( input parser -- list )
 190     #! Skip all leading whitespace from the input then call
 191     #! the parser on the remaining input.
 192     >r left-trim-slice r> sp-parser-p1 parse ;
 193
 194 TUPLE: just-parser p1 ;
 195
 196 C: just just-parser ( p1 -- parser )
 197
 198 M: just-parser parse ( input parser -- result )
 199     #! Calls the given parser on the input removes
 200     #! from the results anything where the remaining
 201     #! input to be parsed is not empty. So ensures a
 202     #! fully parsed input string.
 203     just-parser-p1 parse [ parse-result-unparsed empty? ] lfilter ;
 204
 205 TUPLE: apply-parser p1 quot ;
 206
 207 C: <@ apply-parser ( parser quot -- parser )
 208
 209 M: apply-parser parse ( input parser -- result )
 210     #! Calls the parser on the input. For each successful
 211     #! parse the quot is call with the parse result on the stack.
 212     #! The result of that quotation then becomes the new parse result.
 213     #! This allows modification of parse tree results (like
 214     #! converting strings to integers, etc).
 215     [ apply-parser-p1 ] keep apply-parser-quot
 216     -rot parse [
 217         [ parse-result-parsed swap call ] keep
 218         parse-result-unparsed <parse-result>
 219     ] lmap-with ;
 220
 221 TUPLE: some-parser p1 ;
 222
 223 C: some some-parser ( p1 -- parser )
 224
 225 M: some-parser parse ( input parser -- result )
 226     #! Calls the parser on the input, guarantees
 227     #! the parse is complete (the remaining input is empty),
 228     #! picks the first solution and only returns the parse
 229     #! tree since the remaining input is empty.
 230     some-parser-p1 just parse-1 ;
 231
 232 : <& ( parser1 parser2 -- parser )
 233     #! Same as <&> except discard the results of the second parser.
 234     <&> [ first ] <@ ;
 235
 236 : &> ( parser1 parser2 -- parser )
 237     #! Same as <&> except discard the results of the first parser.
 238     <&> [ second ] <@ ;
 239
 240 : <:&> ( parser1 parser2 -- result )
 241     #! Same as <&> except flatten the result.
 242     <&> [ first2 suffix ] <@ ;
 243
 244 : <&:> ( parser1 parser2 -- result )
 245     #! Same as <&> except flatten the result.
 246     <&> [ first2 swap prefix ] <@ ;
 247
 248 : <:&:> ( parser1 parser2 -- result )
 249     #! Same as <&> except flatten the result.
 250     <&> [ first2 append ] <@ ;
 251
 252 LAZY: <*> ( parser -- parser )
 253     dup <*> <&:> { } succeed <|> ;
 254
 255 : <+> ( parser -- parser )
 256     #! Return a parser that accepts one or more occurences of the original
 257     #! parser.
 258     dup <*> <&:> ;
 259
 260 LAZY: <?> ( parser -- parser )
 261     #! Return a parser that optionally uses the parser
 262     #! if that parser would be successful.
 263     [ 1array ] <@ f succeed <|> ;
 264
 265 TUPLE: only-first-parser p1 ;
 266
 267 LAZY: only-first ( parser -- parser )
 268     only-first-parser boa ;
 269
 270 M: only-first-parser parse ( input parser -- list )
 271     #! Transform a parser into a parser that only yields
 272     #! the first possibility.
 273     only-first-parser-p1 parse 1 swap ltake ;
 274
 275 LAZY: <!*> ( parser -- parser )
 276     #! Like <*> but only return one possible result
 277     #! containing all matching parses. Does not return
 278     #! partial matches. Useful for efficiency since that's
 279     #! usually the effect you want and cuts down on backtracking
 280     #! required.
 281     <*> only-first ;
 282
 283 LAZY: <!+> ( parser -- parser )
 284     #! Like <+> but only return one possible result
 285     #! containing all matching parses. Does not return
 286     #! partial matches. Useful for efficiency since that's
 287     #! usually the effect you want and cuts down on backtracking
 288     #! required.
 289     <+> only-first ;
 290
 291 LAZY: <!?> ( parser -- parser )
 292     #! Like <?> but only return one possible result
 293     #! containing all matching parses. Does not return
 294     #! partial matches. Useful for efficiency since that's
 295     #! usually the effect you want and cuts down on backtracking
 296     #! required.
 297     <?> only-first ;
 298
 299 LAZY: <(?)> ( parser -- parser )
 300     #! Like <?> but take shortest match first.
 301     f succeed swap [ 1array ] <@ <|> ;
 302
 303 LAZY: <(*)> ( parser -- parser )
 304     #! Like <*> but take shortest match first.
 305     #! Implementation by Matthew Willis.
 306     { } succeed swap dup <(*)> <&:> <|> ;
 307
 308 LAZY: <(+)> ( parser -- parser )
 309     #! Like <+> but take shortest match first.
 310     #! Implementation by Matthew Willis.
 311     dup <(*)> <&:> ;
 312
 313 : pack ( close body open -- parser )
 314     #! Parse a construct enclosed by two symbols,
 315     #! given a parser for the opening symbol, the
 316     #! closing symbol, and the body.
 317     <& &> ;
 318
 319 : nonempty-list-of ( items separator -- parser )
 320     [ over &> <*> <&:> ] keep <?> tuck pack ;
 321
 322 : list-of ( items separator -- parser )
 323     #! Given a parser for the separator and for the
 324     #! items themselves, return a parser that parses
 325     #! lists of those items. The parse tree is an
 326     #! array of the parsed items.
 327     nonempty-list-of { } succeed <|> ;
 328
 329 LAZY: surrounded-by ( parser start end -- parser' )
 330     [ token ] bi@ swapd pack ;
 331
 332 : exactly-n ( parser n -- parser' )
 333     swap <repetition> <and-parser> [ flatten ] <@ ;
 334
 335 : at-most-n ( parser n -- parser' )
 336     dup zero? [
 337         2drop epsilon
 338     ] [
 339         2dup exactly-n
 340         -rot 1- at-most-n <|>
 341     ] if ;
 342
 343 : at-least-n ( parser n -- parser' )
 344     dupd exactly-n swap <*> <&> ;
 345
 346 : from-m-to-n ( parser m n -- parser' )
 347     >r [ exactly-n ] 2keep r> swap - at-most-n <:&:> ;