contrib/parser-combinators/parser-combinators.factor

   1 ! Copyright (C) 2004 Chris Double.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 !
   4 USING: lazy-lists kernel sequences strings math io arrays errors namespaces ;
   5 IN: parser-combinators
   6
   7 ! Parser combinator protocol
   8 GENERIC: (parse) ( input parser -- list )
   9
  10 : parse ( input parser -- promise )
  11   [ (parse) ] curry curry <promise> ;
  12
  13 TUPLE: parse-result parsed unparsed ;
  14
  15 : ?head-slice ( seq begin -- newseq ? )
  16   2dup head? [ length tail-slice t ] [ drop f ] if ;
  17
  18 : unclip-slice ( seq -- rest first )
  19   dup 1 tail-slice swap first ;
  20
  21 : h:t ( object -- head tail )
  22   #! Return the head and tail of the object.
  23   dup empty? [ dup first swap 1 tail ] unless ;
  24
  25 TUPLE: token-parser string ;
  26
  27 : token ( string -- parser )
  28   <token-parser> ;
  29
  30 M: token-parser (parse) ( input parser -- list )
  31   token-parser-string swap over ?head-slice [
  32     <parse-result> 1list
  33   ] [
  34     2drop nil
  35   ] if ;
  36
  37 TUPLE: satisfy-parser quot ;
  38
  39 : satisfy ( quot -- parser )
  40   <satisfy-parser> ;
  41
  42 M: satisfy-parser (parse) ( input parser -- list )
  43   #! A parser that succeeds if the predicate,
  44   #! when passed the first character in the input, returns
  45   #! true.
  46   satisfy-parser-quot >r unclip-slice dup r> call [
  47     swap <parse-result> 1list
  48   ] [
  49     2drop nil
  50   ] if ;
  51
  52 TUPLE: epsilon-parser ;
  53
  54 : epsilon ( -- list )
  55   <epsilon-parser> ;
  56
  57 M: epsilon-parser (parse) ( input parser -- list )
  58   #! A parser that parses the empty string. It
  59   #! does not consume any input and always returns
  60   #! an empty list as the parse tree with the
  61   #! unmodified input.
  62   drop "" swap <parse-result> 1list ;
  63
  64 TUPLE: succeed-parser result ;
  65
  66 : succeed ( result -- parser )
  67   <succeed-parser> ;
  68
  69 M: succeed-parser (parse) ( input parser -- list )
  70   #! A parser that always returns 'result' as a
  71   #! successful parse with no input consumed.
  72   succeed-parser-result swap <parse-result> 1list ;
  73
  74 TUPLE: fail-parser ;
  75
  76 : fail ( -- parser )
  77   <fail-parser> ;
  78
  79 M: fail-parser (parse) ( input parser -- list )
  80   #! A parser that always fails and returns
  81   #! an empty list of successes.
  82   2drop nil ;
  83
  84 TUPLE: and-parser p1 p2 ;
  85
  86 : <&> ( parser1 parser2 -- parser )
  87   <and-parser> ;
  88
  89 M: and-parser (parse) ( input parser -- list )
  90   #! Parse 'input' by sequentially combining the
  91   #! two parsers. First parser1 is applied to the
  92   #! input then parser2 is applied to the rest of
  93   #! the input strings from the first parser.
  94   [ and-parser-p1 ] keep and-parser-p2 -rot parse [
  95     dup parse-result-unparsed rot parse
  96     [
  97       >r parse-result-parsed r>
  98       [ parse-result-parsed 2array ] keep
  99       parse-result-unparsed <parse-result>
 100     ] lmap-with
 101   ] lmap-with lconcat ;
 102
 103 TUPLE: or-parser p1 p2 ;
 104
 105 : <|> ( parser1 parser2 -- parser )
 106   <or-parser> ;
 107
 108 M: or-parser (parse) ( input parser1 -- list )
 109   #! Return the combined list resulting from the parses
 110   #! of parser1 and parser2 being applied to the same
 111   #! input. This implements the choice parsing operator.
 112   [ or-parser-p1 ] keep or-parser-p2 >r dupd parse swap r> parse lappend ;
 113
 114 : string-ltrim ( string -- string )
 115   #! Return a new string without any leading whitespace
 116   #! from the original string.
 117   dup first blank? [ 1 tail-slice string-ltrim ] when ;
 118
 119 TUPLE: sp-parser p1 ;
 120
 121 : sp ( p1 -- parser )
 122   #! Return a parser that first skips all whitespace before
 123   #! calling the original parser.
 124   <sp-parser> ;
 125
 126 M: sp-parser (parse) ( input parser -- list )
 127   #! Skip all leading whitespace from the input then call
 128   #! the parser on the remaining input.
 129   >r string-ltrim r> sp-parser-p1 parse ;
 130
 131 TUPLE: just-parser p1 ;
 132
 133 : just ( p1 -- parser )
 134   <just-parser> ;
 135
 136 M: just-parser (parse) ( input parser -- result )
 137   #! Calls the given parser on the input removes
 138   #! from the results anything where the remaining
 139   #! input to be parsed is not empty. So ensures a
 140   #! fully parsed input string.
 141   just-parser-p1 parse [ parse-result-unparsed empty? ] lsubset ;
 142
 143 : <@-parser ( input parser quot -- result )
 144   #! Calls the parser on the input. For each successfull
 145   #! parse the quot is call with the parse result on the stack.
 146   #! The result of that quotation then becomes the new parse result.
 147   #! This allows modification of parse tree results (like
 148   #! converting strings to integers, etc).
 149   -rot call [
 150     [ parse-result-parsed swap call ] keep
 151     parse-result-unparsed <parse-result>
 152   ] lmap-with ;
 153
 154 : <@ ( parser quot -- parser )
 155   #! Return an <@-parser.
 156   [ <@-parser ] curry curry ;
 157
 158 : some-parser ( input parser -- result )
 159   #! Calls the parser on the input, guarantees
 160   #! the parse is complete (the remaining input is empty),
 161   #! picks the first solution and only returns the parse
 162   #! tree since the remaining input is empty.
 163   just call car parse-result-parsed ;
 164
 165 : some ( parser -- deterministic-parser )
 166   #! Creates a 'some-parser'.
 167   [ some-parser ] curry ;
 168
 169 : <& ( parser1 parser2 -- parser )
 170   #! Same as <&> except discard the results of the second parser.
 171   <&> [ first ] <@ ;
 172
 173 : &> ( parser1 parser2 -- parser )
 174   #! Same as <&> except discard the results of the first parser.
 175   <&> [ second ] <@ ;
 176
 177 : <:&>-parser ( input parser1 parser2 -- result )
 178   #! Same as <&> except flatten the result.
 179   <&> [ dup second swap first [ % , ] { } make ] <@ call ;
 180
 181 : <:&> ( parser1 parser2 -- parser )
 182   #! Same as <&> except flatten the result.
 183   [ <:&>-parser ] curry curry ;
 184
 185 : <&:>-parser ( input parser1 parser2 -- result )
 186   #! Same as <&> except flatten the result.
 187   <&> [ dup second swap first [ , % ] { } make ] <@ call ;
 188
 189 : <&:> ( parser1 parser2 -- parser )
 190   #! Same as <&> except flatten the result.
 191   [ <&:>-parser ] curry curry ;
 192
 193 DEFER: <*>
 194
 195 : (<*>) ( parser -- parser )
 196   #! Non-delayed implementation of <*>
 197   dup <*> <&:> [ ] succeed <|> ;
 198
 199 : <*> ( parser -- parser )
 200   #! Return a parser that accepts zero or more occurences of the original
 201   #! parser.
 202   [  (<*>) call ] curry ;
 203
 204 : (<+>) ( parser -- parser )
 205   #! Non-delayed implementation of <+>
 206   dup <*> <&:> ;
 207
 208 : <+> ( parser -- parser )
 209   #! Return a parser that accepts one or more occurences of the original
 210   #! parser.
 211   [  (<+>) call ] curry ;
 212
 213 : (<?>) ( parser -- parser )
 214   #! Non-delayed implementation of <?>
 215   [ unit ] <@ f succeed <|> ;
 216
 217 : <?> ( parser -- parser )
 218   #! Return a parser that optionally uses the parser
 219   #! if that parser would be successfull.
 220   [  (<?>) call ] curry ;