core/parser/parser.factor

   1 ! Copyright (C) 2005, 2007 Slava Pestov.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: arrays definitions generic assocs kernel math
   4 namespaces prettyprint sequences strings vectors words
   5 quotations inspector io.styles io combinators sorting
   6 splitting math.parser effects continuations debugger
   7 io.files io.streams.string io.streams.lines vocabs
   8 source-files classes hashtables ;
   9 IN: parser
  10
  11 SYMBOL: file
  12
  13 TUPLE: lexer text line column ;
  14
  15 : <lexer> ( text -- lexer ) 1 0 lexer construct-boa ;
  16
  17 : line-text ( lexer -- str )
  18     dup lexer-line 1- swap lexer-text ?nth ;
  19
  20 : location ( -- loc )
  21     file get lexer get lexer-line 2dup and
  22     [ >r source-file-path r> 2array ] [ 2drop f ] if ;
  23
  24 SYMBOL: old-definitions
  25 SYMBOL: new-definitions
  26
  27 TUPLE: redefine-error def ;
  28
  29 M: redefine-error error.
  30     "Re-definition of " write
  31     redefine-error-def . ;
  32
  33 : redefine-error ( definition -- )
  34     \ redefine-error construct-boa
  35     { { "Continue" t } } throw-restarts drop ;
  36
  37 : redefinition? ( definition -- ? )
  38     dup class? [ drop f ] [ new-definitions get key? ] if ;
  39
  40 : (save-location) ( definition loc -- )
  41     over redefinition? [ over redefine-error ] when
  42     over set-where
  43     dup new-definitions get dup [ set-at ] [ 3drop ] if ;
  44
  45 : save-location ( definition -- )
  46     location (save-location) ;
  47
  48 SYMBOL: parser-notes
  49
  50 t parser-notes set-global
  51
  52 : parser-notes? ( -- ? )
  53     parser-notes get "quiet" get not and ;
  54
  55 : file. ( file -- )
  56     [
  57         source-file-path <pathname> pprint
  58     ] [
  59         "<interactive>" write
  60     ] if* ":" write ;
  61
  62 : note. ( str -- )
  63     parser-notes? [
  64         file get file.
  65         lexer get [
  66             lexer-line number>string print
  67         ] [
  68             nl
  69         ] if*
  70         "Note: " write dup print
  71     ] when drop ;
  72
  73 : next-line ( lexer -- )
  74     0 over set-lexer-column
  75     dup lexer-line 1+ swap set-lexer-line ;
  76
  77 : skip ( i seq quot -- n )
  78     over >r find* drop
  79     [ r> drop ] [ r> length ] if* ; inline
  80
  81 : change-column ( lexer quot -- )
  82     swap
  83     [ dup lexer-column swap line-text rot call ] keep
  84     set-lexer-column ; inline
  85
  86 GENERIC: skip-blank ( lexer -- )
  87
  88 M: lexer skip-blank ( lexer -- )
  89     [ [ blank? not ] skip ] change-column ;
  90
  91 GENERIC: skip-word ( lexer -- )
  92
  93 M: lexer skip-word ( lexer -- )
  94     [
  95         2dup nth CHAR: " =
  96         [ drop 1+ ] [ [ blank? ] skip ] if
  97     ] change-column ;
  98
  99 : still-parsing? ( lexer -- ? )
 100     dup lexer-line swap lexer-text length <= ;
 101
 102 : still-parsing-line? ( lexer -- ? )
 103     dup lexer-column swap line-text length < ;
 104
 105 : (parse-token) ( lexer -- str )
 106     [ lexer-column ] keep
 107     [ skip-word ] keep
 108     [ lexer-column ] keep
 109     line-text subseq ;
 110
 111 :  parse-token ( lexer -- str/f )
 112     dup still-parsing? [
 113         dup skip-blank
 114         dup still-parsing-line?
 115         [ (parse-token) ] [ dup next-line parse-token ] if
 116     ] [ drop f ] if ;
 117
 118 : scan ( -- str/f ) lexer get parse-token ;
 119
 120 TUPLE: bad-escape ;
 121
 122 : bad-escape ( -- * ) \ bad-escape construct-empty throw ;
 123
 124 M: bad-escape summary drop "Bad escape code" ;
 125
 126 : escape ( escape -- ch )
 127     H{
 128         { CHAR: e  CHAR: \e }
 129         { CHAR: n  CHAR: \n }
 130         { CHAR: r  CHAR: \r }
 131         { CHAR: t  CHAR: \t }
 132         { CHAR: s  CHAR: \s }
 133         { CHAR: \s CHAR: \s }
 134         { CHAR: 0  CHAR: \0 }
 135         { CHAR: \\ CHAR: \\ }
 136         { CHAR: \" CHAR: \" }
 137     } at [ bad-escape ] unless* ;
 138
 139 : next-escape ( m str -- n ch )
 140     2dup nth CHAR: u =
 141     [ >r 1+ dup 4 + tuck r> subseq hex> ]
 142     [ over 1+ -rot nth escape ] if ;
 143
 144 : next-char ( m str -- n ch )
 145     2dup nth CHAR: \\ =
 146     [ >r 1+ r> next-escape ] [ over 1+ -rot nth ] if ;
 147
 148 : (parse-string) ( m str -- n )
 149     2dup nth CHAR: " =
 150     [ drop 1+ ] [ [ next-char , ] keep (parse-string) ] if ;
 151
 152 : parse-string ( -- str )
 153     lexer get [
 154         [ (parse-string) ] "" make swap
 155     ] change-column ;
 156
 157 TUPLE: parse-error file line col text ;
 158
 159 : <parse-error> ( msg -- error )
 160     file get
 161     lexer get lexer-line
 162     lexer get lexer-column
 163     lexer get line-text
 164     parse-error construct-boa
 165     [ set-delegate ] keep ;
 166
 167 : parse-dump ( error -- )
 168     dup parse-error-file file.
 169     dup parse-error-line number>string print
 170     dup parse-error-text dup string? [ print ] [ drop ] if
 171     parse-error-col 0 or CHAR: \s <string> write
 172     "^" print ;
 173
 174 M: parse-error error.
 175     dup parse-dump  delegate error. ;
 176
 177 SYMBOL: use
 178 SYMBOL: in
 179
 180 : word/vocab% ( word -- )
 181     "(" % dup word-vocabulary % " " % word-name % ")" % ;
 182
 183 : shadow-warning ( new old -- )
 184     2dup eq? [
 185         2drop
 186     ] [
 187         [ word/vocab% " shadowed by " % word/vocab% ] "" make
 188         note.
 189     ] if ;
 190
 191 : shadow-warnings ( vocab vocabs -- )
 192     [
 193         swapd assoc-stack dup
 194         [ shadow-warning ] [ 2drop ] if
 195     ] curry assoc-each ;
 196
 197 : (use+) ( vocab -- )
 198     vocab-words use get 2dup shadow-warnings push ;
 199
 200 : use+ ( vocab -- )
 201     load-vocab (use+) ;
 202
 203 : add-use ( seq -- ) [ use+ ] each ;
 204
 205 : set-use ( seq -- )
 206     [ vocab-words ] map [ ] subset >vector use set ;
 207
 208 : check-vocab-string ( name -- name )
 209     dup string?
 210     [ "Vocabulary name must be a string" throw ] unless ;
 211
 212 : set-in ( name -- )
 213     check-vocab-string dup in set create-vocab (use+) ;
 214
 215 : create-in ( string -- word )
 216     in get create dup set-word dup save-location ;
 217
 218 TUPLE: unexpected want got ;
 219
 220 : unexpected ( want got -- * )
 221     \ unexpected construct-boa throw ;
 222
 223 PREDICATE: unexpected unexpected-eof
 224     unexpected-got not ;
 225
 226 : unexpected-eof ( word -- * ) f unexpected ;
 227
 228 : (parse-tokens) ( accum end -- accum )
 229     scan 2dup = [
 230         2drop
 231     ] [
 232         [ pick push (parse-tokens) ] [ unexpected-eof ] if*
 233     ] if ;
 234
 235 : parse-tokens ( end -- seq )
 236     100 <vector> swap (parse-tokens) >array ;
 237
 238 : CREATE ( -- word ) scan create-in ;
 239
 240 : CREATE-CLASS ( -- word )
 241     scan create-in dup predicate-word save-location ;
 242
 243 : word-restarts ( possibilities -- restarts )
 244     natural-sort [
 245         [ "Use the word " swap summary append ] keep
 246     ] { } map>assoc ;
 247
 248 TUPLE: no-word name ;
 249
 250 M: no-word summary
 251     drop "Word not found in current vocabulary search path" ;
 252
 253 : no-word ( name -- newword )
 254     dup \ no-word construct-boa
 255     swap words-named word-restarts throw-restarts
 256     dup word-vocabulary (use+) ;
 257
 258 : forward-reference? ( word -- ? )
 259     dup old-definitions get key?
 260     swap new-definitions get key? not and ;
 261
 262 TUPLE: forward-error word ;
 263
 264 M: forward-error error.
 265     "Forward reference to " write forward-error-word . ;
 266
 267 : forward-error ( word -- )
 268     \ forward-error construct-boa throw ;
 269
 270 : check-forward ( str word -- word )
 271     dup forward-reference? [
 272         drop
 273         dup use get
 274         [ at ] curry* map [ ] subset
 275         [ forward-reference? not ] find nip
 276         [ ] [ forward-error ] ?if
 277     ] [
 278         nip
 279     ] if ;
 280
 281 : search ( str -- word )
 282     dup use get assoc-stack [ check-forward ] [ no-word ] if* ;
 283
 284 : scan-word ( -- word/number/f )
 285     scan dup [ dup string>number [ ] [ search ] ?if ] when ;
 286
 287 : parse-step ( accum end -- accum ? )
 288     scan-word {
 289         { [ 2dup eq? ] [ 2drop f ] }
 290         { [ dup not ] [ drop unexpected-eof t ] }
 291         { [ dup delimiter? ] [ unexpected t ] }
 292         { [ dup parsing? ] [ nip execute t ] }
 293         { [ t ] [ pick push drop t ] }
 294     } cond ;
 295
 296 : (parse-until) ( accum end -- accum )
 297     dup >r parse-step [ r> (parse-until) ] [ r> drop ] if ;
 298
 299 : parse-until ( end -- vec )
 300     100 <vector> swap (parse-until) ;
 301
 302 : parsed ( accum obj -- accum ) over push ;
 303
 304 : with-parser ( lexer quot -- newquot )
 305     swap lexer set
 306     [ call >quotation ] [ <parse-error> rethrow ] recover ;
 307
 308 : (parse-lines) ( lexer -- quot )
 309     [ f parse-until ] with-parser ;
 310
 311 SYMBOL: lexer-factory
 312
 313 [ <lexer> ] lexer-factory set-global
 314
 315 : parse-lines ( lines -- quot )
 316     lexer-factory get call (parse-lines) ;
 317
 318 ! Parsing word utilities
 319 : parse-effect ( -- effect )
 320     ")" parse-tokens { "--" } split1 dup [
 321         <effect>
 322     ] [
 323         "Stack effect declaration must contain --" throw
 324     ] if ;
 325
 326 TUPLE: bad-number ;
 327
 328 : bad-number ( -- * ) \ bad-number construct-boa throw ;
 329
 330 : parse-base ( parsed base -- parsed )
 331     scan swap base> [ bad-number ] unless* parsed ;
 332
 333 : parse-literal ( accum end quot -- accum )
 334     >r parse-until r> call parsed ; inline
 335
 336 : parse-definition ( -- quot )
 337     \ ; parse-until >quotation ;
 338
 339 GENERIC: expected>string ( obj -- str )
 340
 341 M: f expected>string drop "end of input" ;
 342 M: word expected>string word-name ;
 343 M: string expected>string ;
 344
 345 M: unexpected error.
 346     "Expected " write
 347     dup unexpected-want expected>string write
 348     " but got " write
 349     unexpected-got expected>string print ;
 350
 351 M: bad-number summary
 352     drop "Bad number literal" ;
 353
 354 SYMBOL: bootstrap-syntax
 355
 356 : file-vocabs ( -- )
 357     "scratchpad" in set
 358     { "syntax" "scratchpad" } set-use
 359     bootstrap-syntax get [ use get push ] when* ;
 360
 361 : parse-fresh ( lines -- quot )
 362     [ file-vocabs parse-lines ] with-scope ;
 363
 364 SYMBOL: parse-hook
 365
 366 : do-parse-hook ( -- ) parse-hook get [ call ] when* ;
 367
 368 : parsing-file ( file -- )
 369     "quiet" get [
 370         drop
 371     ] [
 372         "Loading " write <pathname> . flush
 373     ] if ;
 374
 375 : no-parse-hook ( quot -- )
 376     >r f parse-hook r> with-variable do-parse-hook ; inline
 377
 378 : start-parsing ( stream name -- )
 379     H{ } clone new-definitions set
 380     dup [
 381         source-file
 382         dup file set
 383         source-file-definitions clone old-definitions set
 384     ] [ drop ] if
 385     contents \ contents set ;
 386
 387 : smudged-usage-warning ( usages removed -- )
 388     parser-notes? [
 389         "Warning: the following definitions were removed from sources," print
 390         "but are still referenced from other definitions:" print
 391         nl
 392         dup stack.
 393         nl
 394         "The following definitions need to be updated:" print
 395         nl
 396         over stack.
 397     ] when 2drop ;
 398
 399 : outside-usages ( seq -- usages )
 400     dup [
 401         over usage [ pathname? not ] subset seq-diff
 402     ] curry { } map>assoc ;
 403
 404 : filter-moved ( assoc -- newassoc )
 405     [
 406         drop where dup [ first ] when
 407         file get source-file-path =
 408     ] assoc-subset ;
 409
 410 : smudged-usage ( -- usages referenced removed )
 411     new-definitions get old-definitions get diff filter-moved
 412     keys [
 413         outside-usages
 414         [ empty? swap pathname? or not ] assoc-subset
 415         dup values concat prune swap keys
 416     ] keep ;
 417
 418 : forget-smudged ( -- )
 419     smudged-usage [ forget ] each
 420     over empty? [ 2dup smudged-usage-warning ] unless 2drop ;
 421
 422 : record-definitions ( file -- )
 423     new-definitions get swap set-source-file-definitions ;
 424
 425 : finish-parsing ( quot -- )
 426     file get dup [
 427         [ record-form ] keep
 428         [ record-modified ] keep
 429         [ \ contents get record-checksum ] keep
 430         record-definitions
 431         forget-smudged
 432     ] [
 433         2drop
 434     ] if ;
 435
 436 : undo-parsing ( -- )
 437     file get [
 438         dup source-file-definitions new-definitions get union
 439         swap set-source-file-definitions
 440     ] when* ;
 441
 442 : parse-stream ( stream name -- quot )
 443     [
 444         [
 445             start-parsing
 446             \ contents get string-lines parse-fresh
 447             dup finish-parsing
 448         ] [ ] [ undo-parsing ] cleanup
 449     ] no-parse-hook ;
 450
 451 : parse-file-restarts ( file -- restarts )
 452     "Load " swap " again" 3append t 2array 1array ;
 453
 454 : parse-file ( file -- quot )
 455     [
 456         [ parsing-file ] keep
 457         [ ?resource-path <file-reader> ] keep
 458         parse-stream
 459     ] [
 460         over parse-file-restarts rethrow-restarts
 461         drop parse-file
 462     ] recover ;
 463
 464 : run-file ( file -- )
 465     [ [ parse-file call ] keep ] assert-depth drop ;
 466
 467 : reload ( defspec -- )
 468     where first [ run-file ] when* ;
 469
 470 : ?run-file ( path -- )
 471     dup ?resource-path exists? [ run-file ] [ drop ] if ;
 472
 473 : bootstrap-file ( path -- )
 474     [
 475         parse-file [ call ] curry %
 476     ] [
 477         run-file
 478     ] if-bootstrapping ;
 479
 480 : ?bootstrap-file ( path -- )
 481     dup ?resource-path exists? [ bootstrap-file ] [ drop ] if ;
 482
 483 : parse ( str -- quot ) string-lines parse-lines ;
 484
 485 : eval ( str -- ) parse call ;
 486
 487 : eval>string ( str -- output )
 488     [
 489         parser-notes off
 490         [ [ eval ] keep ] try drop
 491     ] string-out ;
 492
 493 global [
 494     {
 495         "scratchpad"
 496         "arrays"
 497         "assocs"
 498         "combinators"
 499         "compiler"
 500         "continuations"
 501         "debugger"
 502         "definitions"
 503         "generic"
 504         "inspector"
 505         "io"
 506         "kernel"
 507         "math"
 508         "memory"
 509         "namespaces"
 510         "parser"
 511         "prettyprint"
 512         "sequences"
 513         "slicing"
 514         "sorting"
 515         "strings"
 516         "syntax"
 517         "vocabs"
 518         "vocabs.loader"
 519         "words"
 520     } set-use
 521     "scratchpad" set-in
 522 ] bind