contrib/parser-combinators/parser-combinators.factor

   1 ! Copyright (C) 2004 Chris Double.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 !
   4 USING: lazy-lists kernel sequences sequences-contrib strings math io arrays errors namespaces ;
   5 IN: parser-combinators
   6
   7 ! Parser combinator protocol
   8 GENERIC: (parse) ( input parser -- list )
   9
  10 M: promise (parse) ( input parser -- list )
  11   force (parse) ;
  12
  13 LAZY: parse ( input parser -- promise )
  14   (parse) ;
  15
  16 TUPLE: parse-result parsed unparsed ;
  17 TUPLE: token-parser string ;
  18
  19 LAZY: token ( string -- parser )
  20   <token-parser> ;
  21
  22 M: token-parser (parse) ( input parser -- list )
  23   token-parser-string swap over ?head-slice [
  24     <parse-result> 1list
  25   ] [
  26     2drop nil
  27   ] if ;
  28
  29 TUPLE: satisfy-parser quot ;
  30
  31 LAZY: satisfy ( quot -- parser )
  32   <satisfy-parser> ;
  33
  34 M: satisfy-parser (parse) ( input parser -- list )
  35   #! A parser that succeeds if the predicate,
  36   #! when passed the first character in the input, returns
  37   #! true.
  38   satisfy-parser-quot >r unclip-slice dup r> call [
  39     swap <parse-result> 1list
  40   ] [
  41     2drop nil
  42   ] if ;
  43
  44 TUPLE: epsilon-parser ;
  45
  46 LAZY: epsilon ( -- parser )
  47   <epsilon-parser> ;
  48
  49 M: epsilon-parser (parse) ( input parser -- list )
  50   #! A parser that parses the empty string. It
  51   #! does not consume any input and always returns
  52   #! an empty list as the parse tree with the
  53   #! unmodified input.
  54   drop "" swap <parse-result> 1list ;
  55
  56 TUPLE: succeed-parser result ;
  57
  58 LAZY: succeed ( result -- parser )
  59   <succeed-parser> ;
  60
  61 M: succeed-parser (parse) ( input parser -- list )
  62   #! A parser that always returns 'result' as a
  63   #! successful parse with no input consumed.
  64   succeed-parser-result swap <parse-result> 1list ;
  65
  66 TUPLE: fail-parser ;
  67
  68 LAZY: fail ( -- parser )
  69   <fail-parser> ;
  70
  71 M: fail-parser (parse) ( input parser -- list )
  72   #! A parser that always fails and returns
  73   #! an empty list of successes.
  74   2drop nil ;
  75
  76 TUPLE: and-parser p1 p2 ;
  77
  78 LAZY: <&> ( parser1 parser2 -- parser )
  79   <and-parser> ;
  80
  81 M: and-parser (parse) ( input parser -- list )
  82   #! Parse 'input' by sequentially combining the
  83   #! two parsers. First parser1 is applied to the
  84   #! input then parser2 is applied to the rest of
  85   #! the input strings from the first parser.
  86   [ and-parser-p1 ] keep and-parser-p2 -rot parse [
  87     dup parse-result-unparsed rot parse
  88     [
  89       >r parse-result-parsed r>
  90       [ parse-result-parsed 2array ] keep
  91       parse-result-unparsed <parse-result>
  92     ] lmap-with
  93   ] lmap-with lconcat ;
  94
  95 TUPLE: or-parser p1 p2 ;
  96
  97 LAZY: <|> ( parser1 parser2 -- parser )
  98   <or-parser> ;
  99
 100 M: or-parser (parse) ( input parser1 -- list )
 101   #! Return the combined list resulting from the parses
 102   #! of parser1 and parser2 being applied to the same
 103   #! input. This implements the choice parsing operator.
 104   [ or-parser-p1 ] keep or-parser-p2 >r dupd parse swap r> parse lappend ;
 105
 106 : ltrim-slice ( string -- string )
 107   #! Return a new string without any leading whitespace
 108   #! from the original string.
 109   dup first blank? [ 1 tail-slice ltrim-slice ] when ;
 110
 111 TUPLE: sp-parser p1 ;
 112
 113 LAZY: sp ( p1 -- parser )
 114   #! Return a parser that first skips all whitespace before
 115   #! calling the original parser.
 116   <sp-parser> ;
 117
 118 M: sp-parser (parse) ( input parser -- list )
 119   #! Skip all leading whitespace from the input then call
 120   #! the parser on the remaining input.
 121   >r ltrim-slice r> sp-parser-p1 parse ;
 122
 123 TUPLE: just-parser p1 ;
 124
 125 LAZY: just ( p1 -- parser )
 126   <just-parser> ;
 127
 128 M: just-parser (parse) ( input parser -- result )
 129   #! Calls the given parser on the input removes
 130   #! from the results anything where the remaining
 131   #! input to be parsed is not empty. So ensures a
 132   #! fully parsed input string.
 133   just-parser-p1 parse [ parse-result-unparsed empty? ] lsubset ;
 134
 135 TUPLE: apply-parser p1 quot ;
 136
 137 LAZY: <@ ( parser quot -- parser )
 138   <apply-parser> ;
 139
 140 M: apply-parser (parse) ( input parser -- result )
 141   #! Calls the parser on the input. For each successfull
 142   #! parse the quot is call with the parse result on the stack.
 143   #! The result of that quotation then becomes the new parse result.
 144   #! This allows modification of parse tree results (like
 145   #! converting strings to integers, etc).
 146   [ apply-parser-p1 ] keep apply-parser-quot
 147   -rot parse [
 148     [ parse-result-parsed swap call ] keep
 149     parse-result-unparsed <parse-result>
 150   ] lmap-with ;
 151
 152 TUPLE: some-parser p1 ;
 153
 154 LAZY: some ( p1 -- parser )
 155   <some-parser> ;
 156
 157 M: some-parser (parse) ( input parser -- result )
 158   #! Calls the parser on the input, guarantees
 159   #! the parse is complete (the remaining input is empty),
 160   #! picks the first solution and only returns the parse
 161   #! tree since the remaining input is empty.
 162   some-parser-p1 just parse car parse-result-parsed ;
 163
 164
 165 LAZY: <& ( parser1 parser2 -- parser )
 166   #! Same as <&> except discard the results of the second parser.
 167   <&> [ first ] <@ ;
 168
 169 LAZY: &> ( parser1 parser2 -- parser )
 170   #! Same as <&> except discard the results of the first parser.
 171   <&> [ second ] <@ ;
 172
 173 LAZY: <:&> ( parser1 parser2 -- result )
 174   #! Same as <&> except flatten the result.
 175   <&> [ dup second swap first [ % , ] { } make ] <@ ;
 176
 177 LAZY: <&:> ( parser1 parser2 -- result )
 178   #! Same as <&> except flatten the result.
 179   <&> [ dup second swap first [ , % ] { } make ] <@ ;
 180
 181 LAZY: <*> ( parser -- parser )
 182   dup <*> <&:> { } succeed <|> ;
 183
 184 LAZY: <+> ( parser -- parser )
 185   #! Return a parser that accepts one or more occurences of the original
 186   #! parser.
 187   dup <*> <&:> ;
 188
 189 LAZY: <?> ( parser -- parser )
 190   #! Return a parser that optionally uses the parser
 191   #! if that parser would be successfull.
 192   [ 1array ] <@ f succeed <|> ;