contrib/parser-combinators/parser-combinators.factor

   1 ! Copyright (C) 2004 Chris Double.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 !
   4 USING: lazy-lists kernel sequences strings math io arrays errors namespaces ;
   5 IN: parser-combinators
   6
   7 ! Parser combinator protocol
   8 GENERIC: (parse) ( input parser -- list )
   9
  10 M: promise (parse) ( input parser -- list )
  11   force (parse) ;
  12
  13 : parse ( input parser -- promise )
  14   [ (parse) ] curry curry <promise> ;
  15
  16 TUPLE: parse-result parsed unparsed ;
  17
  18 : ?head-slice ( seq begin -- newseq ? )
  19   2dup head? [ length tail-slice t ] [ drop f ] if ;
  20
  21 : unclip-slice ( seq -- rest first )
  22   dup 1 tail-slice swap first ;
  23
  24 : h:t ( object -- head tail )
  25   #! Return the head and tail of the object.
  26   dup empty? [ dup first swap 1 tail ] unless ;
  27
  28 TUPLE: token-parser string ;
  29
  30 : token ( string -- parser )
  31   <token-parser> ;
  32
  33 M: token-parser (parse) ( input parser -- list )
  34   token-parser-string swap over ?head-slice [
  35     <parse-result> 1list
  36   ] [
  37     2drop nil
  38   ] if ;
  39
  40 TUPLE: satisfy-parser quot ;
  41
  42 : satisfy ( quot -- parser )
  43   <satisfy-parser> ;
  44
  45 M: satisfy-parser (parse) ( input parser -- list )
  46   #! A parser that succeeds if the predicate,
  47   #! when passed the first character in the input, returns
  48   #! true.
  49   satisfy-parser-quot >r unclip-slice dup r> call [
  50     swap <parse-result> 1list
  51   ] [
  52     2drop nil
  53   ] if ;
  54
  55 TUPLE: epsilon-parser ;
  56
  57 : epsilon ( -- list )
  58   <epsilon-parser> ;
  59
  60 M: epsilon-parser (parse) ( input parser -- list )
  61   #! A parser that parses the empty string. It
  62   #! does not consume any input and always returns
  63   #! an empty list as the parse tree with the
  64   #! unmodified input.
  65   drop "" swap <parse-result> 1list ;
  66
  67 TUPLE: succeed-parser result ;
  68
  69 : succeed ( result -- parser )
  70   <succeed-parser> ;
  71
  72 M: succeed-parser (parse) ( input parser -- list )
  73   #! A parser that always returns 'result' as a
  74   #! successful parse with no input consumed.
  75   succeed-parser-result swap <parse-result> 1list ;
  76
  77 TUPLE: fail-parser ;
  78
  79 : fail ( -- parser )
  80   <fail-parser> ;
  81
  82 M: fail-parser (parse) ( input parser -- list )
  83   #! A parser that always fails and returns
  84   #! an empty list of successes.
  85   2drop nil ;
  86
  87 TUPLE: and-parser p1 p2 ;
  88
  89 : <&> ( parser1 parser2 -- parser )
  90   <and-parser> ;
  91
  92 M: and-parser (parse) ( input parser -- list )
  93   #! Parse 'input' by sequentially combining the
  94   #! two parsers. First parser1 is applied to the
  95   #! input then parser2 is applied to the rest of
  96   #! the input strings from the first parser.
  97   [ and-parser-p1 ] keep and-parser-p2 -rot parse [
  98     dup parse-result-unparsed rot parse
  99     [
 100       >r parse-result-parsed r>
 101       [ parse-result-parsed 2array ] keep
 102       parse-result-unparsed <parse-result>
 103     ] lmap-with
 104   ] lmap-with lconcat ;
 105
 106 TUPLE: or-parser p1 p2 ;
 107
 108 : <|> ( parser1 parser2 -- parser )
 109   <or-parser> ;
 110
 111 M: or-parser (parse) ( input parser1 -- list )
 112   #! Return the combined list resulting from the parses
 113   #! of parser1 and parser2 being applied to the same
 114   #! input. This implements the choice parsing operator.
 115   [ or-parser-p1 ] keep or-parser-p2 >r dupd parse swap r> parse lappend ;
 116
 117 : string-ltrim ( string -- string )
 118   #! Return a new string without any leading whitespace
 119   #! from the original string.
 120   dup first blank? [ 1 tail-slice string-ltrim ] when ;
 121
 122 TUPLE: sp-parser p1 ;
 123
 124 : sp ( p1 -- parser )
 125   #! Return a parser that first skips all whitespace before
 126   #! calling the original parser.
 127   <sp-parser> ;
 128
 129 M: sp-parser (parse) ( input parser -- list )
 130   #! Skip all leading whitespace from the input then call
 131   #! the parser on the remaining input.
 132   >r string-ltrim r> sp-parser-p1 parse ;
 133
 134 TUPLE: just-parser p1 ;
 135
 136 : just ( p1 -- parser )
 137   <just-parser> ;
 138
 139 M: just-parser (parse) ( input parser -- result )
 140   #! Calls the given parser on the input removes
 141   #! from the results anything where the remaining
 142   #! input to be parsed is not empty. So ensures a
 143   #! fully parsed input string.
 144   just-parser-p1 parse [ parse-result-unparsed empty? ] lsubset ;
 145
 146 TUPLE: apply-parser p1 quot ;
 147
 148 : <@ ( parser quot -- parser )
 149   <apply-parser> ;
 150
 151 M: apply-parser (parse) ( input parser -- result )
 152   #! Calls the parser on the input. For each successfull
 153   #! parse the quot is call with the parse result on the stack.
 154   #! The result of that quotation then becomes the new parse result.
 155   #! This allows modification of parse tree results (like
 156   #! converting strings to integers, etc).
 157   [ apply-parser-p1 ] keep apply-parser-quot
 158   -rot parse [
 159     [ parse-result-parsed swap call ] keep
 160     parse-result-unparsed <parse-result>
 161   ] lmap-with ;
 162
 163 TUPLE: some-parser p1 ;
 164
 165 : some ( p1 -- parser )
 166   <some-parser> ;
 167
 168 M: some-parser (parse) ( input parser -- result )
 169   #! Calls the parser on the input, guarantees
 170   #! the parse is complete (the remaining input is empty),
 171   #! picks the first solution and only returns the parse
 172   #! tree since the remaining input is empty.
 173   some-parser-p1 just parse car parse-result-parsed ;
 174
 175
 176 : <& ( parser1 parser2 -- parser )
 177   #! Same as <&> except discard the results of the second parser.
 178   <&> [ first ] <@ ;
 179
 180 : &> ( parser1 parser2 -- parser )
 181   #! Same as <&> except discard the results of the first parser.
 182   <&> [ second ] <@ ;
 183
 184 : <:&> ( parser1 parser2 -- result )
 185   #! Same as <&> except flatten the result.
 186   <&> [ dup second swap first [ % , ] { } make ] <@ ;
 187
 188 : <&:> ( parser1 parser2 -- result )
 189   #! Same as <&> except flatten the result.
 190   <&> [ dup second swap first [ , % ] { } make ] <@ ;
 191
 192 : <*> ( parser -- parser )
 193   [ dup <*> <&:> { } succeed <|> ] curry <promise> ;
 194
 195 : (<+>) ( parser -- parser )
 196   #! Non-delayed implementation of <+>
 197   dup <*> <&:> ;
 198
 199 : <+> ( parser -- parser )
 200   #! Return a parser that accepts one or more occurences of the original
 201   #! parser.
 202   [  (<+>) call ] curry ;
 203
 204 : (<?>) ( parser -- parser )
 205   #! Non-delayed implementation of <?>
 206   [ unit ] <@ f succeed <|> ;
 207
 208 : <?> ( parser -- parser )
 209   #! Return a parser that optionally uses the parser
 210   #! if that parser would be successfull.
 211   [  (<?>) call ] curry ;