contrib/parser-combinators/parser-combinators.factor

   1 ! Copyright (C) 2004 Chris Double.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 !
   4 USING: lazy-lists kernel sequences strings math io arrays errors namespaces ;
   5 IN: parser-combinators
   6
   7 ! Parser combinator protocol
   8 GENERIC: (parse) ( input parser -- list )
   9
  10 : parse ( input parser -- promise )
  11   [ (parse) ] curry curry <promise> ;
  12
  13 TUPLE: parse-result parsed unparsed ;
  14
  15 : ?head-slice ( seq begin -- newseq ? )
  16   2dup head? [ length tail-slice t ] [ drop f ] if ;
  17
  18 : unclip-slice ( seq -- rest first )
  19   dup 1 tail-slice swap first ;
  20
  21 : h:t ( object -- head tail )
  22   #! Return the head and tail of the object.
  23   dup empty? [ dup first swap 1 tail ] unless ;
  24
  25 TUPLE: token-parser string ;
  26
  27 : token ( string -- parser )
  28   <token-parser> ;
  29
  30 M: token-parser (parse) ( input parser -- list )
  31   token-parser-string swap over ?head-slice [
  32     <parse-result> 1list
  33   ] [
  34     2drop nil
  35   ] if ;
  36
  37 TUPLE: satisfy-parser quot ;
  38
  39 : satisfy ( quot -- parser )
  40   <satisfy-parser> ;
  41
  42 M: satisfy-parser (parse) ( input parser -- list )
  43   #! A parser that succeeds if the predicate,
  44   #! when passed the first character in the input, returns
  45   #! true.
  46   satisfy-parser-quot >r unclip-slice dup r> call [
  47     swap <parse-result> 1list
  48   ] [
  49     2drop nil
  50   ] if ;
  51
  52 : satisfy2-parser ( inp pred quot -- llist )
  53   #! A parser that succeeds if the predicate,
  54   #! when passed the first character in the input, returns
  55   #! true. On success the quotation is called with the
  56   #! successfully parsed character on the stack. The result
  57   #! of that call is returned as the result portion of the
  58   #! successfull parse lazy list.
  59   -rot over first swap call [
  60     h:t >r swap call r> <parse-result> 1list
  61   ] [
  62     2drop nil
  63   ] if ;
  64
  65   : satisfy2 ( pred quot -- parser )
  66   #! Return a satisfy2-parser.
  67   [ satisfy2-parser ] curry curry ;
  68
  69 : epsilon-parser ( input -- llist )
  70   #! A parser that parses the empty string. It
  71   #! does not consume any input and always returns
  72   #! an empty list as the parse tree with the
  73   #! unmodified input.
  74   "" swap <parse-result> 1list ;
  75
  76 : epsilon ( -- parser )
  77   #! Return an epsilon parser
  78   [ epsilon-parser ] ;
  79
  80 : succeed-parser ( input result -- llist )
  81   #! A parser that always returns 'result' as a
  82   #! successful parse with no input consumed.
  83   swap <parse-result> 1list ;
  84
  85 : succeed ( result -- parser )
  86   #! Return a succeed parser.
  87   [ succeed-parser ] curry ;
  88
  89 : fail-parser ( input -- llist )
  90   #! A parser that always fails and returns
  91   #! an empty list of successes.
  92   drop nil ;
  93
  94 : fail ( -- parser )
  95   #! Return a fail-parser.
  96   [ fail-parser ] ;
  97
  98 : <&>-parser ( input parser1 parser2 -- parser )
  99   #! Parse 'input' by sequentially combining the
 100   #! two parsers. First parser1 is applied to the
 101   #! input then parser2 is applied to the rest of
 102   #! the input strings from the first parser.
 103   -rot call [
 104     dup parse-result-unparsed rot call
 105     [
 106       >r parse-result-parsed r>
 107       [ parse-result-parsed 2array ] keep
 108       parse-result-unparsed <parse-result>
 109     ] lmap-with
 110   ] lmap-with lconcat ;
 111
 112 : <&> ( parser1 parser2 -- parser )
 113   #! Sequentially combine two parsers, returning a parser
 114   #! that first calls p1, then p2 all remaining results from
 115   #! p1.
 116   [ <&>-parser ] curry curry ;
 117
 118 : <|>-parser ( input parser1 parser2 -- result )
 119   #! Return the combined list resulting from the parses
 120   #! of parser1 and parser2 being applied to the same
 121   #! input. This implements the choice parsing operator.
 122   >r dupd call swap r> call lappend ;
 123
 124 : <|> ( p1 p2 -- parser )
 125   #! Choice operator for parsers. Return a parser that does
 126   #! p1 or p2 depending on which will succeed.
 127   [ <|>-parser ] curry curry ;
 128
 129 : string-ltrim ( string -- string )
 130   #! Return a new string without any leading whitespace
 131   #! from the original string.
 132   dup first blank? [ 1 tail string-ltrim ] when ;
 133
 134 : sp-parser ( input parser -- result )
 135   #! Skip all leading whitespace from the input then call
 136   #! the parser on the remaining input.
 137   >r string-ltrim r> call ;
 138
 139 : sp ( parser -- parser )
 140   #! Return a parser that first skips all whitespace before
 141   #! calling the original parser.
 142   [ sp-parser ] curry ;
 143
 144 : just-parser ( input parser -- result )
 145   #! Calls the given parser on the input removes
 146   #! from the results anything where the remaining
 147   #! input to be parsed is not empty. So ensures a
 148   #! fully parsed input string.
 149   call [ parse-result-unparsed empty? ] lsubset ;
 150
 151 : just ( parser -- parser )
 152   #! Return an instance of the just-parser.
 153   [ just-parser ] curry ;
 154
 155 : <@-parser ( input parser quot -- result )
 156   #! Calls the parser on the input. For each successfull
 157   #! parse the quot is call with the parse result on the stack.
 158   #! The result of that quotation then becomes the new parse result.
 159   #! This allows modification of parse tree results (like
 160   #! converting strings to integers, etc).
 161   -rot call [
 162     [ parse-result-parsed swap call ] keep
 163     parse-result-unparsed <parse-result>
 164   ] lmap-with ;
 165
 166 : <@ ( parser quot -- parser )
 167   #! Return an <@-parser.
 168   [ <@-parser ] curry curry ;
 169
 170 : some-parser ( input parser -- result )
 171   #! Calls the parser on the input, guarantees
 172   #! the parse is complete (the remaining input is empty),
 173   #! picks the first solution and only returns the parse
 174   #! tree since the remaining input is empty.
 175   just call car parse-result-parsed ;
 176
 177 : some ( parser -- deterministic-parser )
 178   #! Creates a 'some-parser'.
 179   [ some-parser ] curry ;
 180
 181 : <& ( parser1 parser2 -- parser )
 182   #! Same as <&> except discard the results of the second parser.
 183   <&> [ first ] <@ ;
 184
 185 : &> ( parser1 parser2 -- parser )
 186   #! Same as <&> except discard the results of the first parser.
 187   <&> [ second ] <@ ;
 188
 189 : <:&>-parser ( input parser1 parser2 -- result )
 190   #! Same as <&> except flatten the result.
 191   <&> [ dup second swap first [ % , ] { } make ] <@ call ;
 192
 193 : <:&> ( parser1 parser2 -- parser )
 194   #! Same as <&> except flatten the result.
 195   [ <:&>-parser ] curry curry ;
 196
 197 : <&:>-parser ( input parser1 parser2 -- result )
 198   #! Same as <&> except flatten the result.
 199   <&> [ dup second swap first [ , % ] { } make ] <@ call ;
 200
 201 : <&:> ( parser1 parser2 -- parser )
 202   #! Same as <&> except flatten the result.
 203   [ <&:>-parser ] curry curry ;
 204
 205 DEFER: <*>
 206
 207 : (<*>) ( parser -- parser )
 208   #! Non-delayed implementation of <*>
 209   dup <*> <&:> [ ] succeed <|> ;
 210
 211 : <*> ( parser -- parser )
 212   #! Return a parser that accepts zero or more occurences of the original
 213   #! parser.
 214   [  (<*>) call ] curry ;
 215
 216 : (<+>) ( parser -- parser )
 217   #! Non-delayed implementation of <+>
 218   dup <*> <&:> ;
 219
 220 : <+> ( parser -- parser )
 221   #! Return a parser that accepts one or more occurences of the original
 222   #! parser.
 223   [  (<+>) call ] curry ;
 224
 225 : (<?>) ( parser -- parser )
 226   #! Non-delayed implementation of <?>
 227   [ unit ] <@ f succeed <|> ;
 228
 229 : <?> ( parser -- parser )
 230   #! Return a parser that optionally uses the parser
 231   #! if that parser would be successfull.
 232   [  (<?>) call ] curry ;