peg: add 'got' slot to parse-error, so you get a little more info about why the parsi...

author Björn Lindqvist <bjourne@gmail.com>

Tue, 21 Oct 2014 14:27:33 +0000 (16:27 +0200)

committer Doug Coleman <doug.coleman@gmail.com>

Thu, 23 Oct 2014 18:31:27 +0000 (11:31 -0700)
author Björn Lindqvist <bjourne@gmail.com>
Tue, 21 Oct 2014 14:27:33 +0000 (16:27 +0200)
committer Doug Coleman <doug.coleman@gmail.com>
Thu, 23 Oct 2014 18:31:27 +0000 (11:31 -0700)
diff --git a/basis/peg/debugger/debugger-tests.factor b/basis/peg/debugger/debugger-tests.factor

new file mode 100644 (file)

index 0000000..7968abf
--- /dev/null
+++ b/basis/peg/debugger/debugger-tests.factor
@@ -0,0 +1,7 @@
+USING: arrays continuations debugger io.streams.string peg tools.test ;
+IN: peg.debugger.tests
+
+{ "Peg parsing error at character position 0.\nExpected 'A' or 'B'\nGot 'xxxx'\n" } [
+    [ "xxxx" "A" token "B" token 2array choice parse ] [ ] recover
+    [ error. ] with-string-writer
+] unit-test
diff --git a/basis/peg/debugger/debugger.factor b/basis/peg/debugger/debugger.factor

index 7e751b5110c6a24ff7bb44387a0f5385bd0c5986..32e9c182012925bebe2e4451a94ff45bec3a4cb7 100644 (file)
--- a/basis/peg/debugger/debugger.factor
+++ b/basis/peg/debugger/debugger.factor
@@ -1,12 +1,13 @@
-USING: io kernel accessors math.parser sequences prettyprint
+USING: formatting io kernel accessors math.parser sequences prettyprint
  debugger peg ;
  IN: peg.debugger
  
+
  M: parse-error error.
-  "Peg parsing error at character position " write dup position>> number>string write 
-  "." print "Expected " write messages>> [ " or " write ] [ write ] interleave nl ;
+    [ position>> ] [ messages>> " or " join ] [ got>> ] tri
+    "Peg parsing error at character position %d.\nExpected %s\nGot '%s'\n"
+    printf ;
  
  M: parse-failed error.
-  "The " write dup word>> pprint " word could not parse the following input:" print nl
-  input>> . ;
-
+    "The " write dup word>> pprint " word could not parse the following input:" print nl
+    input>> . ;
diff --git a/basis/peg/peg-tests.factor b/basis/peg/peg-tests.factor

index ebfdebfcb15c762022ec6928f8004ed79f94fc2b..2a568c1e9d23ab65e3eda85d600d70700f44275b 100644 (file)
--- a/basis/peg/peg-tests.factor
+++ b/basis/peg/peg-tests.factor
@@ -1,8 +1,8 @@
  ! Copyright (C) 2007 Chris Double.
  ! See http://factorcode.org/license.txt for BSD license.
  !
-USING: kernel tools.test strings namespaces make arrays sequences 
-       peg peg.private peg.parsers words math accessors ;
+USING: continuations kernel tools.test strings namespaces make arrays
+sequences peg peg.private peg.parsers words math accessors ;
  IN: peg.tests
  
  [ ] [ reset-pegs ] unit-test
@@ -50,11 +50,11 @@ IN: peg.tests
  ] unit-test
  
  [
-    "cbcd" "a" token "b" token 2array choice parse 
+    "cbcd" "a" token "b" token 2array choice parse
  ] must-fail
  
  [
-    "" "a" token "b" token 2array choice parse 
+    "" "a" token "b" token 2array choice parse
  ] must-fail
  
  { 0 } [
@@ -98,7 +98,7 @@ IN: peg.tests
  ] unit-test
  
  [
-    "bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse 
+    "bb" "a" token ensure CHAR: a CHAR: z range 2array seq parse
  ] must-fail
  
  { t } [
@@ -158,7 +158,7 @@ IN: peg.tests
  ] unit-test
  
  [
-    "a]" "[" token hide "a" token "]" token hide 3array seq parse 
+    "a]" "[" token hide "a" token "]" token hide 3array seq parse
  ] must-fail
  
  
@@ -171,7 +171,7 @@ IN: peg.tests
      "1+1" swap parse
  ] unit-test
  
-: expr ( -- parser ) 
+: expr ( -- parser )
      #! Test direct left recursion. Currently left recursion should cause a
      #! failure of that parser.
      [ expr ] delay "+" token "1" token 3seq "1" token 2choice ;
@@ -188,7 +188,7 @@ IN: peg.tests
  ] unit-test
  
  [
-    "A" [ drop t ] satisfy [ 66 >= ] semantic parse 
+    "A" [ drop t ] satisfy [ 66 >= ] semantic parse
  ] must-fail
  
  { CHAR: B } [
@@ -206,3 +206,13 @@ USE: compiler
  [ ] [ enable-optimizer ] unit-test
  
  [ [ ] ] [ "" epsilon [ drop [ [ ] ] call ] action parse ] unit-test
+
+{
+    T{ parse-error
+       { position 0 }
+       { got "fbcd" }
+       { messages V{ "'a'" "'b'" } }
+    }
+} [
+    [ "fbcd" "a" token "b" token 2array choice parse ] [ ] recover
+] unit-test
diff --git a/basis/peg/peg.factor b/basis/peg/peg.factor

index 6ccd93f3a7b45d42df36439e59fbf7b58a3bbfd9..caa2ebb6c014b8ec2d41a356b7547e8b51c7b349 100644 (file)
--- a/basis/peg/peg.factor
+++ b/basis/peg/peg.factor
@@ -9,7 +9,7 @@ FROM: namespaces => set ;
  IN: peg
  
  TUPLE: parse-result remaining ast ;
-TUPLE: parse-error position messages ; 
+TUPLE: parse-error position got messages ;
  TUPLE: parser peg compiled id ;
  
  M: parser equal? { [ [ class-of ] same? ] [ [ id>> ] same? ] } 2&& ;
@@ -20,6 +20,10 @@ C: <parse-error>  parse-error
  
  SYMBOL: error-stack
  
+: merge-overlapping-errors ( a b -- c )
+    dupd [ messages>> ] bi@ union [ [ position>> ] [ got>> ] bi ] dip
+    <parse-error> ;
+
  : (merge-errors) ( a b -- c )
      {
          { [ over position>> not ] [ nip ] }
@@ -28,7 +32,7 @@ SYMBOL: error-stack
              2dup [ position>> ] compare {
                  { +lt+ [ nip ] }
                  { +gt+ [ drop ] }
-                { +eq+ [ messages>> over messages>> union [ position>> ] dip <parse-error> ] }
+                { +eq+ [ merge-overlapping-errors ] }
              } case
          ]
      } cond ;
@@ -40,7 +44,7 @@ SYMBOL: error-stack
          drop
      ] if ;
  
-: add-error ( remaining message -- )
+: add-error ( position got message -- )
      <parse-error> error-stack get push ;
  
  SYMBOL: ignore
@@ -81,7 +85,7 @@ SYMBOL: lrstack
  : reset-pegs ( -- )
      H{ } clone \ peg-cache set-global ;
  
-reset-pegs 
+reset-pegs
  
  #! An entry in the table of memoized parse results
  #! ast = an AST produced from the parse
@@ -90,10 +94,10 @@ reset-pegs
  #! pos = the position in the input string of this entry
  TUPLE: memo-entry ans pos ;
  
-TUPLE: left-recursion seed rule-id head next ; 
+TUPLE: left-recursion seed rule-id head next ;
  TUPLE: peg-head rule-id involved-set eval-set ;
  
-: rule-id ( word -- id ) 
+: rule-id ( word -- id )
      #! A rule is the parser compiled down to a word. It has
      #! a "peg-id" property containing the id of the original parser.
      "peg-id" word-prop ;
@@ -112,7 +116,7 @@ TUPLE: peg-head rule-id involved-set eval-set ;
          nip [ ast>> ] [ remaining>> ] bi input-from pos set
      ] [
          pos set fail
-    ] if* ; 
+    ] if* ;
  
  : eval-rule ( rule -- ast )
      #! Evaluate a rule, return an ast resulting from it.
@@ -132,7 +136,7 @@ TUPLE: peg-head rule-id involved-set eval-set ;
      swap >>ans pos get >>pos drop ;
  
  : stop-growth? ( ast m -- ? )
-    [ failed? pos get ] dip 
+    [ failed? pos get ] dip
      pos>> <= or ;
  
  : setup-growth ( h p -- )
@@ -272,7 +276,7 @@ GENERIC: (compile) ( peg -- quot )
      #! If not, compile it to a temporary word, cache it,
      #! and return it. Otherwise return the existing one.
      #! Circular parsers are supported by getting the word
-    #! name and storing it in the cache, before compiling, 
+    #! name and storing it in the cache, before compiling,
      #! so it is picked up when re-entered.
      dup compiled>> [
          nip
@@ -333,9 +337,9 @@ TUPLE: token-parser symbol ;
  : parse-token ( input string -- result )
      #! Parse the string, returning a parse result
      [ ?head-slice ] keep swap [
-        <parse-result> f f add-error
+        <parse-result> f f f add-error
      ] [
-        [ drop pos get "token '" ] dip append "'" append 1vector add-error f
+        [ seq>> pos get swap ] dip "'" "'" surround 1vector add-error f
      ] if ;
  
  M: token-parser (compile) ( peg -- quot )
@@ -404,7 +408,7 @@ M: seq-parser (compile) ( peg -- quot )
          [
              parsers>> unclip compile-parser-quot [ parse-seq-element ] curry ,
              [ compile-parser-quot [ merge-errors ] compose [ parse-seq-element ] curry , ] each
-        ] { } make , \ 1&& , 
+        ] { } make , \ 1&& ,
      ] [ ] make ;
  
  TUPLE: choice-parser parsers ;
@@ -421,7 +425,7 @@ TUPLE: repeat0-parser p1 ;
  
  : (repeat) ( quot: ( -- result ) result -- result )
      over call [
-        [ remaining>> swap remaining<< ] 2keep 
+        [ remaining>> swap remaining<< ] 2keep
          ast>> swap [ ast>> push ] keep
          (repeat)
      ] [
@@ -430,7 +434,7 @@ TUPLE: repeat0-parser p1 ;
  
  M: repeat0-parser (compile) ( peg -- quot )
      p1>> compile-parser-quot '[
-        input-slice V{ } clone <parse-result> _ swap (repeat) 
+        input-slice V{ } clone <parse-result> _ swap (repeat)
      ] ;
  
  TUPLE: repeat1-parser p1 ;
@@ -443,8 +447,8 @@ TUPLE: repeat1-parser p1 ;
      ] if* ;
  
  M: repeat1-parser (compile) ( peg -- quot )
-    p1>> compile-parser-quot '[ 
-        input-slice V{ } clone <parse-result> _ swap (repeat) repeat1-empty-check  
+    p1>> compile-parser-quot '[
+        input-slice V{ } clone <parse-result> _ swap (repeat) repeat1-empty-check
      ] ;
  
  TUPLE: optional-parser p1 ;
@@ -500,16 +504,16 @@ TUPLE: sp-parser p1 ;
  
  M: sp-parser (compile) ( peg -- quot )
      p1>> compile-parser-quot '[
-        input-slice [ blank? ] trim-head-slice input-from pos set @ 
+        input-slice [ blank? ] trim-head-slice input-from pos set @
      ] ;
  
  TUPLE: delay-parser quot ;
  
  M: delay-parser (compile) ( peg -- quot )
      #! For efficiency we memoize the quotation.
-    #! This way it is run only once and the 
+    #! This way it is run only once and the
      #! parser constructed once at run time.
-    quot>> gensym [ delayed get set-at ] keep 1quotation ; 
+    quot>> gensym [ delayed get set-at ] keep 1quotation ;
  
  TUPLE: box-parser quot ;
author	Björn Lindqvist <bjourne@gmail.com>
	Tue, 21 Oct 2014 14:27:33 +0000 (16:27 +0200)
committer	Doug Coleman <doug.coleman@gmail.com>
	Thu, 23 Oct 2014 18:31:27 +0000 (11:31 -0700)
basis/peg/debugger/debugger-tests.factor	[new file with mode: 0644]	patch \| blob
basis/peg/debugger/debugger.factor		patch \| blob \| history
basis/peg/peg-tests.factor		patch \| blob \| history
basis/peg/peg.factor		patch \| blob \| history