core/lexer/lexer-docs.factor

   1 USING: help.markup help.syntax kernel strings words quotations ;
   2 IN: lexer
   3
   4 HELP: lexer
   5 { $var-description "Stores the current " { $link lexer } " instance." }
   6 { $class-description "An object for tokenizing parser input. It has the following slots:"
   7     { $slots
   8         { "text" "the lines being parsed; an array of strings" }
   9         { "line" "the line number being parsed; unlike most indices this is 1-based for friendlier error reporting and integration with text editors" }
  10         { "column" "the current column position, zero-based" }
  11     }
  12 "Custom lexing can be implemented by delegating a tuple to an instance of this class and implementing the " { $link skip-word } " and " { $link skip-blank } " generic words." } ;
  13
  14 HELP: <lexer>
  15 { $values { "text" { $sequence string } } { "lexer" lexer } }
  16 { $description "Creates a new lexer for tokenizing the given sequence of lines." } ;
  17
  18 HELP: next-line
  19 { $values { "lexer" lexer } }
  20 { $description "Advances the lexer to the next input line, discarding the remainder of the current line." } ;
  21
  22 HELP: lexer-error
  23 { $error-description "Thrown when the lexer encounters invalid input. A lexer error wraps an underlying error together with line and column numbers." } ;
  24
  25 HELP: <lexer-error>
  26 { $values { "msg" "an error" } { "error" lexer-error } }
  27 { $description "Creates a new " { $link lexer-error } ", filling in the location information from the current " { $link lexer } "." } ;
  28
  29 HELP: change-lexer-column
  30 { $values { "lexer" lexer } { "quot" { $quotation ( ..a col line -- ..b newcol ) } } }
  31 { $description "Applies a quotation to the current column and line text to produce a new column, and moves the lexer position." } ;
  32
  33 HELP: skip-blank
  34 { $values { "lexer" lexer } }
  35 { $contract "Skips whitespace characters." }
  36 { $notes "Custom lexers can implement this generic word." } ;
  37
  38 HELP: skip-word
  39 { $values { "lexer" lexer } }
  40 { $contract
  41     "Skips until the end of the current token."
  42     $nl
  43     "The default implementation treats a single " { $snippet "\"" } " as a word by itself; otherwise it searches forward until a whitespace character or the end of the line."
  44 }
  45 { $notes "Custom lexers can implement this generic word." } ;
  46
  47 HELP: still-parsing-line?
  48 { $values { "lexer" lexer } { "?" boolean } }
  49 { $description "Outputs " { $link f } " if the end of the current line has been reached, " { $link t } " otherwise." } ;
  50
  51 HELP: parse-token
  52 { $values { "lexer" lexer } { "str/f" { $maybe string } } }
  53 { $description "Reads the next token from the lexer. Tokens are delimited by whitespace, with the exception that " { $snippet "\"" } " is treated like a single token even when not followed by whitespace." } ;
  54
  55 HELP: ?scan-token
  56 { $values { "str/f" { $maybe string } } }
  57 { $description "Reads the next token from the lexer. Tokens are delimited by whitespace, with the exception that " { $snippet "\"" } " is treated like a single token even when not followed by whitespace. This word outputs " { $link f } " on end of input. To throw an error on end of input, use " { $link scan-token } " instead." }
  58 $parsing-note ;
  59
  60 HELP: scan-token
  61 { $values { "str" string } }
  62 { $description "Reads the next token from the lexer. Tokens are delimited by whitespace, with the exception that " { $snippet "\"" } " is treated like a single token even when not followed by whitespace. This word throws " { $link unexpected-eof } " on end of input. To output " { $link f } " on end of input, use " { $link ?scan-token } " instead." }
  63 $parsing-note ;
  64
  65 HELP: still-parsing?
  66 { $values { "lexer" lexer } { "?" boolean } }
  67 { $description "Outputs " { $link f } " if end of input has been reached, " { $link t } " otherwise." } ;
  68
  69 HELP: each-token
  70 { $values { "end" string } { "quot" { $quotation ( ... token -- ... ) } } }
  71 { $description "Reads a sequence of tokens until the first occurrence of " { $snippet "end" } ". " { $snippet "quot" } " is called on each token as it is read." }
  72 { $examples "This word is used to implement " { $link POSTPONE: USING: } "." }
  73 $parsing-note ;
  74
  75 HELP: map-tokens
  76 { $values { "end" string } { "quot" { $quotation ( ... token -- ... elt ) } } { "seq" { $sequence object } } }
  77 { $description "Reads a sequence of tokens until the first occurrence of " { $snippet "end" } ". " { $snippet "quot" } " is called on each token as it is read, and the results are collected into a new output sequence." }
  78 $parsing-note ;
  79
  80 HELP: parse-tokens
  81 { $values { "end" string } { "seq" { $sequence string } } }
  82 { $description "Reads a sequence of tokens until the first occurrence of " { $snippet "end" } ". The tokens remain as strings and are not processed in any way. This word is equivalent to " { $link map-tokens } " with an empty quotation." }
  83 $parsing-note ;
  84
  85 HELP: unexpected
  86 { $values { "want" { $maybe word } } { "got" word } }
  87 { $description "Throws an " { $link unexpected } " error." }
  88 { $error-description "Thrown by the parser if an unmatched closing delimiter is encountered." }
  89 { $examples
  90     "Parsing the following snippet will throw this error:"
  91     { $code "[ 1 2 3 }" }
  92 } ;
  93
  94 HELP: unexpected-eof
  95 { $values { "word" "a " { $link word } } }
  96 { $description "Throws an " { $link unexpected } " error indicating the parser was looking for an occurrence of " { $snippet "word" } " but encountered end of file." } ;
  97
  98 HELP: with-lexer
  99 { $values { "lexer" lexer } { "quot" quotation } { "newquot" quotation } }
 100 { $description "Calls the quotation with the " { $link lexer } " variable set to the given lexer. The quotation can make use of words such as " { $link scan-token } ". Any errors thrown by the quotation are wrapped in " { $link lexer-error } " instances." } ;
 101
 102 ARTICLE: "parser-lexer" "The lexer"
 103 "A variable that encapsulate internal parser state:"
 104 { $subsections lexer }
 105 "Creating a default lexer:"
 106 { $subsections <lexer> }
 107 "A word to test of the end of input has been reached:"
 108 { $subsections still-parsing? }
 109 "A word to advance the lexer to the next line:"
 110 { $subsections next-line }
 111 "Two generic words to override the lexer's token boundary detection:"
 112 { $subsections
 113     skip-blank
 114     skip-word
 115 }
 116 "Utility combinator:"
 117 { $subsections with-lexer } ;