contrib/parser-combinators/parser-combinators.factor

   1 ! Copyright (C) 2004 Chris Double.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 !
   4 USING: lazy-lists kernel sequences sequences-contrib strings math io arrays errors namespaces ;
   5 IN: parser-combinators
   6
   7 ! Parser combinator protocol
   8 GENERIC: (parse) ( input parser -- list )
   9
  10 M: promise (parse) ( input parser -- list )
  11   force (parse) ;
  12
  13 LAZY: parse ( input parser -- promise )
  14   (parse) ;
  15
  16 TUPLE: parse-result parsed unparsed ;
  17 TUPLE: token-parser string ;
  18
  19 LAZY: token ( string -- parser )
  20   <token-parser> ;
  21
  22 M: token-parser (parse) ( input parser -- list )
  23   token-parser-string swap over ?head-slice [
  24     <parse-result> 1list
  25   ] [
  26     2drop nil
  27   ] if ;
  28
  29 TUPLE: satisfy-parser quot ;
  30
  31 LAZY: satisfy ( quot -- parser )
  32   <satisfy-parser> ;
  33
  34 M: satisfy-parser (parse) ( input parser -- list )
  35   #! A parser that succeeds if the predicate,
  36   #! when passed the first character in the input, returns
  37   #! true.
  38   over empty? [
  39     2drop nil
  40   ] [
  41     satisfy-parser-quot >r unclip-slice dup r> call [
  42       swap <parse-result> 1list
  43     ] [
  44       2drop nil
  45     ] if
  46   ] if ;
  47
  48 TUPLE: epsilon-parser ;
  49
  50 LAZY: epsilon ( -- parser )
  51   <epsilon-parser> ;
  52
  53 M: epsilon-parser (parse) ( input parser -- list )
  54   #! A parser that parses the empty string. It
  55   #! does not consume any input and always returns
  56   #! an empty list as the parse tree with the
  57   #! unmodified input.
  58   drop "" swap <parse-result> 1list ;
  59
  60 TUPLE: succeed-parser result ;
  61
  62 LAZY: succeed ( result -- parser )
  63   <succeed-parser> ;
  64
  65 M: succeed-parser (parse) ( input parser -- list )
  66   #! A parser that always returns 'result' as a
  67   #! successful parse with no input consumed.
  68   succeed-parser-result swap <parse-result> 1list ;
  69
  70 TUPLE: fail-parser ;
  71
  72 LAZY: fail ( -- parser )
  73   <fail-parser> ;
  74
  75 M: fail-parser (parse) ( input parser -- list )
  76   #! A parser that always fails and returns
  77   #! an empty list of successes.
  78   2drop nil ;
  79
  80 TUPLE: and-parser p1 p2 ;
  81
  82 LAZY: <&> ( parser1 parser2 -- parser )
  83   <and-parser> ;
  84
  85 M: and-parser (parse) ( input parser -- list )
  86   #! Parse 'input' by sequentially combining the
  87   #! two parsers. First parser1 is applied to the
  88   #! input then parser2 is applied to the rest of
  89   #! the input strings from the first parser.
  90   [ and-parser-p1 ] keep and-parser-p2 -rot parse [
  91     dup parse-result-unparsed rot parse
  92     [
  93       >r parse-result-parsed r>
  94       [ parse-result-parsed 2array ] keep
  95       parse-result-unparsed <parse-result>
  96     ] lmap-with
  97   ] lmap-with lconcat ;
  98
  99 TUPLE: or-parser p1 p2 ;
 100
 101 LAZY: <|> ( parser1 parser2 -- parser )
 102   <or-parser> ;
 103
 104 M: or-parser (parse) ( input parser1 -- list )
 105   #! Return the combined list resulting from the parses
 106   #! of parser1 and parser2 being applied to the same
 107   #! input. This implements the choice parsing operator.
 108   [ or-parser-p1 ] keep or-parser-p2 >r dupd parse swap r> parse lappend ;
 109
 110 : ltrim-slice ( string -- string )
 111   #! Return a new string without any leading whitespace
 112   #! from the original string.
 113   dup first blank? [ 1 tail-slice ltrim-slice ] when ;
 114
 115 TUPLE: sp-parser p1 ;
 116
 117 LAZY: sp ( p1 -- parser )
 118   #! Return a parser that first skips all whitespace before
 119   #! calling the original parser.
 120   <sp-parser> ;
 121
 122 M: sp-parser (parse) ( input parser -- list )
 123   #! Skip all leading whitespace from the input then call
 124   #! the parser on the remaining input.
 125   >r ltrim-slice r> sp-parser-p1 parse ;
 126
 127 TUPLE: just-parser p1 ;
 128
 129 LAZY: just ( p1 -- parser )
 130   <just-parser> ;
 131
 132 M: just-parser (parse) ( input parser -- result )
 133   #! Calls the given parser on the input removes
 134   #! from the results anything where the remaining
 135   #! input to be parsed is not empty. So ensures a
 136   #! fully parsed input string.
 137   just-parser-p1 parse [ parse-result-unparsed empty? ] lsubset ;
 138
 139 TUPLE: apply-parser p1 quot ;
 140
 141 LAZY: <@ ( parser quot -- parser )
 142   <apply-parser> ;
 143
 144 M: apply-parser (parse) ( input parser -- result )
 145   #! Calls the parser on the input. For each successfull
 146   #! parse the quot is call with the parse result on the stack.
 147   #! The result of that quotation then becomes the new parse result.
 148   #! This allows modification of parse tree results (like
 149   #! converting strings to integers, etc).
 150   [ apply-parser-p1 ] keep apply-parser-quot
 151   -rot parse [
 152     [ parse-result-parsed swap call ] keep
 153     parse-result-unparsed <parse-result>
 154   ] lmap-with ;
 155
 156 TUPLE: some-parser p1 ;
 157
 158 LAZY: some ( p1 -- parser )
 159   <some-parser> ;
 160
 161 M: some-parser (parse) ( input parser -- result )
 162   #! Calls the parser on the input, guarantees
 163   #! the parse is complete (the remaining input is empty),
 164   #! picks the first solution and only returns the parse
 165   #! tree since the remaining input is empty.
 166   some-parser-p1 just parse car parse-result-parsed ;
 167
 168
 169 LAZY: <& ( parser1 parser2 -- parser )
 170   #! Same as <&> except discard the results of the second parser.
 171   <&> [ first ] <@ ;
 172
 173 LAZY: &> ( parser1 parser2 -- parser )
 174   #! Same as <&> except discard the results of the first parser.
 175   <&> [ second ] <@ ;
 176
 177 LAZY: <:&> ( parser1 parser2 -- result )
 178   #! Same as <&> except flatten the result.
 179   <&> [ dup second swap first [ % , ] { } make ] <@ ;
 180
 181 LAZY: <&:> ( parser1 parser2 -- result )
 182   #! Same as <&> except flatten the result.
 183   <&> [ dup second swap first [ , % ] { } make ] <@ ;
 184
 185 LAZY: <*> ( parser -- parser )
 186   dup <*> <&:> { } succeed <|> ;
 187
 188 LAZY: <+> ( parser -- parser )
 189   #! Return a parser that accepts one or more occurences of the original
 190   #! parser.
 191   dup <*> <&:> ;
 192
 193 LAZY: <?> ( parser -- parser )
 194   #! Return a parser that optionally uses the parser
 195   #! if that parser would be successfull.
 196   [ 1array ] <@ f succeed <|> ;