core/parser/parser.factor

   1 ! Copyright (C) 2005, 2007 Slava Pestov.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: arrays definitions generic assocs kernel math
   4 namespaces prettyprint sequences strings vectors words
   5 quotations inspector io.styles io combinators sorting
   6 splitting math.parser effects continuations debugger
   7 io.files io.streams.string io.streams.lines vocabs
   8 source-files classes hashtables compiler.errors compiler.units
   9 ascii ;
  10 IN: parser
  11
  12 TUPLE: lexer text line column ;
  13
  14 : <lexer> ( text -- lexer ) 1 0 lexer construct-boa ;
  15
  16 : line-text ( lexer -- str )
  17     dup lexer-line 1- swap lexer-text ?nth ;
  18
  19 : location ( -- loc )
  20     file get lexer get lexer-line 2dup and
  21     [ >r source-file-path r> 2array ] [ 2drop f ] if ;
  22
  23 : save-location ( definition -- )
  24     location remember-definition ;
  25
  26 : save-class-location ( class -- )
  27     location remember-class ;
  28
  29 SYMBOL: parser-notes
  30
  31 t parser-notes set-global
  32
  33 : parser-notes? ( -- ? )
  34     parser-notes get "quiet" get not and ;
  35
  36 : file. ( file -- )
  37     [
  38         source-file-path <pathname> pprint
  39     ] [
  40         "<interactive>" write
  41     ] if* ":" write ;
  42
  43 : note. ( str -- )
  44     parser-notes? [
  45         file get file.
  46         lexer get [
  47             lexer-line number>string print
  48         ] [
  49             nl
  50         ] if*
  51         "Note: " write dup print
  52     ] when drop ;
  53
  54 : next-line ( lexer -- )
  55     0 over set-lexer-column
  56     dup lexer-line 1+ swap set-lexer-line ;
  57
  58 : skip ( i seq quot -- n )
  59     over >r find* drop
  60     [ r> drop ] [ r> length ] if* ; inline
  61
  62 : change-column ( lexer quot -- )
  63     swap
  64     [ dup lexer-column swap line-text rot call ] keep
  65     set-lexer-column ; inline
  66
  67 GENERIC: skip-blank ( lexer -- )
  68
  69 M: lexer skip-blank ( lexer -- )
  70     [ [ blank? not ] skip ] change-column ;
  71
  72 GENERIC: skip-word ( lexer -- )
  73
  74 M: lexer skip-word ( lexer -- )
  75     [
  76         2dup nth CHAR: " =
  77         [ drop 1+ ] [ [ blank? ] skip ] if
  78     ] change-column ;
  79
  80 : still-parsing? ( lexer -- ? )
  81     dup lexer-line swap lexer-text length <= ;
  82
  83 : still-parsing-line? ( lexer -- ? )
  84     dup lexer-column swap line-text length < ;
  85
  86 : (parse-token) ( lexer -- str )
  87     [ lexer-column ] keep
  88     [ skip-word ] keep
  89     [ lexer-column ] keep
  90     line-text subseq ;
  91
  92 :  parse-token ( lexer -- str/f )
  93     dup still-parsing? [
  94         dup skip-blank
  95         dup still-parsing-line?
  96         [ (parse-token) ] [ dup next-line parse-token ] if
  97     ] [ drop f ] if ;
  98
  99 : scan ( -- str/f ) lexer get parse-token ;
 100
 101 TUPLE: bad-escape ;
 102
 103 : bad-escape ( -- * )
 104     \ bad-escape construct-empty throw ;
 105
 106 M: bad-escape summary drop "Bad escape code" ;
 107
 108 : escape ( escape -- ch )
 109     H{
 110         { CHAR: e  CHAR: \e }
 111         { CHAR: n  CHAR: \n }
 112         { CHAR: r  CHAR: \r }
 113         { CHAR: t  CHAR: \t }
 114         { CHAR: s  CHAR: \s }
 115         { CHAR: \s CHAR: \s }
 116         { CHAR: 0  CHAR: \0 }
 117         { CHAR: \\ CHAR: \\ }
 118         { CHAR: \" CHAR: \" }
 119     } at [ bad-escape ] unless* ;
 120
 121 : next-escape ( m str -- n ch )
 122     2dup nth CHAR: u =
 123     [ >r 1+ dup 6 + tuck r> subseq hex> ]
 124     [ over 1+ -rot nth escape ] if ;
 125
 126 : next-char ( m str -- n ch )
 127     2dup nth CHAR: \\ =
 128     [ >r 1+ r> next-escape ] [ over 1+ -rot nth ] if ;
 129
 130 : (parse-string) ( m str -- n )
 131     2dup nth CHAR: " =
 132     [ drop 1+ ] [ [ next-char , ] keep (parse-string) ] if ;
 133
 134 : parse-string ( -- str )
 135     lexer get [
 136         [ (parse-string) ] "" make swap
 137     ] change-column ;
 138
 139 TUPLE: parse-error file line col text ;
 140
 141 : <parse-error> ( msg -- error )
 142     file get
 143     lexer get lexer-line
 144     lexer get lexer-column
 145     lexer get line-text
 146     parse-error construct-boa
 147     [ set-delegate ] keep ;
 148
 149 : parse-dump ( error -- )
 150     dup parse-error-file file.
 151     dup parse-error-line number>string print
 152     dup parse-error-text dup string? [ print ] [ drop ] if
 153     parse-error-col 0 or CHAR: \s <string> write
 154     "^" print ;
 155
 156 M: parse-error error.
 157     dup parse-dump  delegate error. ;
 158
 159 SYMBOL: use
 160 SYMBOL: in
 161
 162 : word/vocab% ( word -- )
 163     "(" % dup word-vocabulary % " " % word-name % ")" % ;
 164
 165 : shadow-warning ( new old -- )
 166     2dup eq? [
 167         2drop
 168     ] [
 169         [ word/vocab% " shadowed by " % word/vocab% ] "" make
 170         note.
 171     ] if ;
 172
 173 : shadow-warnings ( vocab vocabs -- )
 174     [
 175         swapd assoc-stack dup
 176         [ shadow-warning ] [ 2drop ] if
 177     ] curry assoc-each ;
 178
 179 : (use+) ( vocab -- )
 180     vocab-words use get 2dup shadow-warnings push ;
 181
 182 : use+ ( vocab -- )
 183     load-vocab (use+) ;
 184
 185 : add-use ( seq -- ) [ use+ ] each ;
 186
 187 : set-use ( seq -- )
 188     [ vocab-words ] map [ ] subset >vector use set ;
 189
 190 : check-vocab-string ( name -- name )
 191     dup string?
 192     [ "Vocabulary name must be a string" throw ] unless ;
 193
 194 : set-in ( name -- )
 195     check-vocab-string dup in set create-vocab (use+) ;
 196
 197 : create-in ( string -- word )
 198     in get create dup set-word dup save-location ;
 199
 200 TUPLE: unexpected want got ;
 201
 202 : unexpected ( want got -- * )
 203     \ unexpected construct-boa throw ;
 204
 205 PREDICATE: unexpected unexpected-eof
 206     unexpected-got not ;
 207
 208 : unexpected-eof ( word -- * ) f unexpected ;
 209
 210 : (parse-tokens) ( accum end -- accum )
 211     scan 2dup = [
 212         2drop
 213     ] [
 214         [ pick push (parse-tokens) ] [ unexpected-eof ] if*
 215     ] if ;
 216
 217 : parse-tokens ( end -- seq )
 218     100 <vector> swap (parse-tokens) >array ;
 219
 220 : CREATE ( -- word ) scan create-in ;
 221
 222 : CREATE-CLASS ( -- word )
 223     scan in get create
 224     dup save-class-location
 225     dup predicate-word dup set-word save-location ;
 226
 227 : word-restarts ( possibilities -- restarts )
 228     natural-sort [
 229         [ "Use the word " swap summary append ] keep
 230     ] { } map>assoc ;
 231
 232 TUPLE: no-word name ;
 233
 234 M: no-word summary
 235     drop "Word not found in current vocabulary search path" ;
 236
 237 : no-word ( name -- newword )
 238     dup \ no-word construct-boa
 239     swap words-named word-restarts throw-restarts
 240     dup word-vocabulary (use+) ;
 241
 242 : check-forward ( str word -- word )
 243     dup forward-reference? [
 244         drop
 245         dup use get
 246         [ at ] with map [ ] subset
 247         [ forward-reference? not ] find nip
 248         [ ] [ forward-error ] ?if
 249     ] [
 250         nip
 251     ] if ;
 252
 253 : search ( str -- word )
 254     dup use get assoc-stack [ check-forward ] [ no-word ] if* ;
 255
 256 : scan-word ( -- word/number/f )
 257     scan dup [ dup string>number [ ] [ search ] ?if ] when ;
 258
 259 TUPLE: staging-violation word ;
 260
 261 : staging-violation ( word -- * )
 262     \ staging-violation construct-boa throw ;
 263
 264 M: staging-violation summary
 265     drop
 266     "A parsing word cannot be used in the same file it is defined in." ;
 267
 268 : execute-parsing ( word -- )
 269     new-definitions get [
 270         dupd first key? [ staging-violation ] when
 271     ] when*
 272     execute ;
 273
 274 : parse-step ( accum end -- accum ? )
 275     scan-word {
 276         { [ 2dup eq? ] [ 2drop f ] }
 277         { [ dup not ] [ drop unexpected-eof t ] }
 278         { [ dup delimiter? ] [ unexpected t ] }
 279         { [ dup parsing? ] [ nip execute-parsing t ] }
 280         { [ t ] [ pick push drop t ] }
 281     } cond ;
 282
 283 : (parse-until) ( accum end -- accum )
 284     dup >r parse-step [ r> (parse-until) ] [ r> drop ] if ;
 285
 286 : parse-until ( end -- vec )
 287     100 <vector> swap (parse-until) ;
 288
 289 : parsed ( accum obj -- accum ) over push ;
 290
 291 : with-parser ( lexer quot -- newquot )
 292     swap lexer set
 293     [ call >quotation ] [ <parse-error> rethrow ] recover ;
 294
 295 : (parse-lines) ( lexer -- quot )
 296     [ f parse-until ] with-parser ;
 297
 298 SYMBOL: lexer-factory
 299
 300 [ <lexer> ] lexer-factory set-global
 301
 302 : parse-lines ( lines -- quot )
 303     lexer-factory get call (parse-lines) ;
 304
 305 ! Parsing word utilities
 306 : parse-effect ( -- effect )
 307     ")" parse-tokens { "--" } split1 dup [
 308         <effect>
 309     ] [
 310         "Stack effect declaration must contain --" throw
 311     ] if ;
 312
 313 TUPLE: bad-number ;
 314
 315 : bad-number ( -- * ) \ bad-number construct-boa throw ;
 316
 317 : parse-base ( parsed base -- parsed )
 318     scan swap base> [ bad-number ] unless* parsed ;
 319
 320 : parse-literal ( accum end quot -- accum )
 321     >r parse-until r> call parsed ; inline
 322
 323 : parse-definition ( -- quot )
 324     \ ; parse-until >quotation ;
 325
 326 GENERIC: expected>string ( obj -- str )
 327
 328 M: f expected>string drop "end of input" ;
 329 M: word expected>string word-name ;
 330 M: string expected>string ;
 331
 332 M: unexpected error.
 333     "Expected " write
 334     dup unexpected-want expected>string write
 335     " but got " write
 336     unexpected-got expected>string print ;
 337
 338 M: bad-number summary
 339     drop "Bad number literal" ;
 340
 341 SYMBOL: bootstrap-syntax
 342
 343 : with-file-vocabs ( quot -- )
 344     [
 345         "scratchpad" in set
 346         { "syntax" "scratchpad" } set-use
 347         bootstrap-syntax get [ use get push ] when*
 348         call
 349     ] with-scope ; inline
 350
 351 SYMBOL: interactive-vocabs
 352
 353 {
 354     "arrays"
 355     "assocs"
 356     "combinators"
 357     "compiler.errors"
 358     "continuations"
 359     "debugger"
 360     "definitions"
 361     "editors"
 362     "generic"
 363     "help"
 364     "inspector"
 365     "io"
 366     "io.files"
 367     "kernel"
 368     "listener"
 369     "math"
 370     "memory"
 371     "namespaces"
 372     "prettyprint"
 373     "sequences"
 374     "slicing"
 375     "sorting"
 376     "strings"
 377     "syntax"
 378     "tools.annotations"
 379     "tools.crossref"
 380     "tools.memory"
 381     "tools.profiler"
 382     "tools.test"
 383     "tools.time"
 384     "vocabs"
 385     "vocabs.loader"
 386     "words"
 387     "scratchpad"
 388 } interactive-vocabs set-global
 389
 390 : with-interactive-vocabs ( quot -- )
 391     [
 392         "scratchpad" in set
 393         interactive-vocabs get set-use
 394         call
 395     ] with-scope ; inline
 396
 397 : parse-fresh ( lines -- quot )
 398     [ parse-lines ] with-file-vocabs ;
 399
 400 : parsing-file ( file -- )
 401     "quiet" get [
 402         drop
 403     ] [
 404         "Loading " write <pathname> . flush
 405     ] if ;
 406
 407 : smudged-usage-warning ( usages removed -- )
 408     parser-notes? [
 409         "Warning: the following definitions were removed from sources," print
 410         "but are still referenced from other definitions:" print
 411         nl
 412         dup stack.
 413         nl
 414         "The following definitions need to be updated:" print
 415         nl
 416         over stack.
 417     ] when 2drop ;
 418
 419 : outside-usages ( seq -- usages )
 420     dup [
 421         over usage [ pathname? not ] subset seq-diff
 422     ] curry { } map>assoc ;
 423
 424 : filter-moved ( assoc -- newassoc )
 425     [
 426         drop where dup [ first ] when
 427         file get source-file-path =
 428     ] assoc-subset ;
 429
 430 : removed-definitions ( -- definitions )
 431     new-definitions old-definitions
 432     [ get first2 union ] 2apply diff ;
 433
 434 : smudged-usage ( -- usages referenced removed )
 435     removed-definitions filter-moved keys [
 436         outside-usages
 437         [ empty? swap pathname? or not ] assoc-subset
 438         dup values concat prune swap keys
 439     ] keep ;
 440
 441 : forget-smudged ( -- )
 442     smudged-usage forget-all
 443     over empty? [ 2dup smudged-usage-warning ] unless 2drop ;
 444
 445 : finish-parsing ( lines quot -- )
 446     file get
 447     [ record-form ] keep
 448     [ record-modified ] keep
 449     [ record-definitions ] keep
 450     record-checksum ;
 451
 452 : parse-stream ( stream name -- quot )
 453     [
 454         [
 455             lines dup parse-fresh
 456             tuck finish-parsing
 457             forget-smudged
 458         ] with-source-file
 459     ] with-compilation-unit ;
 460
 461 : parse-file-restarts ( file -- restarts )
 462     "Load " swap " again" 3append t 2array 1array ;
 463
 464 : parse-file ( file -- quot )
 465     [
 466         [
 467             [ parsing-file ] keep
 468             [ ?resource-path <file-reader> ] keep
 469             parse-stream
 470         ] with-compiler-errors
 471     ] [
 472         over parse-file-restarts rethrow-restarts
 473         drop parse-file
 474     ] recover ;
 475
 476 : run-file ( file -- )
 477     [ [ parse-file call ] keep ] assert-depth drop ;
 478
 479 : ?run-file ( path -- )
 480     dup ?resource-path exists? [ run-file ] [ drop ] if ;
 481
 482 : bootstrap-file ( path -- )
 483     [ parse-file % ] [ run-file ] if-bootstrapping ;
 484
 485 : eval ( str -- )
 486     [ string-lines parse-fresh ] with-compilation-unit call ;
 487
 488 : eval>string ( str -- output )
 489     [
 490         parser-notes off
 491         [ [ eval ] keep ] try drop
 492     ] string-out ;