DLL_OBJS = $(PLAF_DLL_OBJS) \
vm/alien.o \
vm/bignum.o \
+ vm/callstack.o \
+ vm/code_block.o \
+ vm/code_gc.o \
vm/code_heap.o \
+ vm/data_gc.o \
+ vm/data_heap.o \
vm/debug.o \
+ vm/errors.o \
vm/factor.o \
vm/ffi_test.o \
vm/image.o \
vm/io.o \
vm/math.o \
- vm/data_gc.o \
- vm/code_gc.o \
vm/primitives.o \
+ vm/profiler.o \
+ vm/quotations.o \
vm/run.o \
- vm/callstack.o \
vm/types.o \
- vm/quotations.o \
- vm/utilities.o \
- vm/errors.o \
- vm/profiler.o
+ vm/utilities.o
EXE_OBJS = $(PLAF_EXE_OBJS)
[ { "blah" 123 } ] [ { "blah" xyz } expand-constants ] unit-test
-: foo ( -- n ) &: fdafd [ 123 ] unless* ;
-
-[ 123 ] [ foo ] unit-test
-
[ -1 ] [ -1 <char> *char ] unit-test
[ -1 ] [ -1 <short> *short ] unit-test
[ -1 ] [ -1 <int> *int ] unit-test
"void" { "long" } "cdecl" [ sleep ] alien-callback ;
: ?callback ( word -- alien )
- dup compiled>> [ execute ] [ drop f ] if ; inline
+ dup optimized>> [ execute ] [ drop f ] if ; inline
: init-remote-control ( -- )
\ eval-callback ?callback 16 setenv
alien.arrays alien.strings kernel math namespaces parser
sequences words quotations math.parser splitting grouping
effects assocs combinators lexer strings.parser alien.parser
-fry vocabs.parser ;
+fry vocabs.parser words.constant ;
IN: alien.syntax
: DLL" lexer get skip-blank parse-string dlopen parsed ; parsing
: C-ENUM:
";" parse-tokens
- dup length
- [ [ create-in ] dip 1quotation define ] 2each ;
+ [ [ create-in ] dip define-constant ] each-index ;
parsing
+: address-of ( name library -- value )
+ load-library dlsym [ "No such symbol" throw ] unless* ;
+
: &:
- scan "c-library" get
- '[ _ _ load-library dlsym ] over push-all ; parsing
+ scan "c-library" get '[ _ _ address-of ] over push-all ; parsing
{ $values { "str" "a string" } { "upper" "a string" } }\r
{ $description "Converts an ASCII string to upper case." } ;\r
\r
-ARTICLE: "ascii" "ASCII character classes"\r
-"The " { $vocab-link "ascii" } " vocabulary implements traditional ASCII character classes:"\r
+ARTICLE: "ascii" "ASCII"\r
+"The " { $vocab-link "ascii" } " vocabulary implements support for the legacy ASCII character set. Most applications should use " { $link "unicode" } " instead."\r
+$nl\r
+"ASCII character classes:"\r
{ $subsection blank? }\r
{ $subsection letter? }\r
{ $subsection LETTER? }\r
{ $subsection control? }\r
{ $subsection quotable? }\r
{ $subsection ascii? }\r
-"ASCII case conversion is also implemented:"\r
+"ASCII case conversion:"\r
{ $subsection ch>lower }\r
{ $subsection ch>upper }\r
{ $subsection >lower }\r
-{ $subsection >upper }\r
-"Modern applications should use Unicode 5.1 instead (" { $vocab-link "unicode.categories" } ")." ;\r
+{ $subsection >upper } ;\r
\r
ABOUT: "ascii"\r
-! Copyright (C) 2005, 2008 Slava Pestov.\r
+! Copyright (C) 2005, 2009 Slava Pestov.\r
! See http://factorcode.org/license.txt for BSD license.\r
-USING: kernel math math.order sequences\r
-combinators.short-circuit ;\r
+USING: kernel math math.order sequences strings\r
+combinators.short-circuit hints ;\r
IN: ascii\r
\r
: ascii? ( ch -- ? ) 0 127 between? ; inline\r
-\r
: blank? ( ch -- ? ) " \t\n\r" member? ; inline\r
-\r
: letter? ( ch -- ? ) CHAR: a CHAR: z between? ; inline\r
-\r
: LETTER? ( ch -- ? ) CHAR: A CHAR: Z between? ; inline\r
-\r
: digit? ( ch -- ? ) CHAR: 0 CHAR: 9 between? ; inline\r
-\r
: printable? ( ch -- ? ) CHAR: \s CHAR: ~ between? ; inline\r
-\r
-: control? ( ch -- ? )\r
- "\0\e\r\n\t\u000008\u00007f" member? ; inline\r
-\r
-: quotable? ( ch -- ? )\r
- dup printable? [ "\"\\" member? not ] [ drop f ] if ; inline\r
-\r
-: Letter? ( ch -- ? )\r
- [ [ letter? ] [ LETTER? ] ] 1|| ;\r
-\r
-: alpha? ( ch -- ? )\r
- [ [ Letter? ] [ digit? ] ] 1|| ;\r
-\r
-: ch>lower ( ch -- lower )\r
- dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ;\r
-\r
-: >lower ( str -- lower )\r
- [ ch>lower ] map ;\r
-\r
-: ch>upper ( ch -- upper )\r
- dup CHAR: a CHAR: z between? [ HEX: 20 - ] when ;\r
-\r
-: >upper ( str -- upper )\r
- [ ch>upper ] map ;\r
+: control? ( ch -- ? ) "\0\e\r\n\t\u000008\u00007f" member? ; inline\r
+: quotable? ( ch -- ? ) { [ printable? ] [ "\"\\" member? not ] } 1&& ; inline\r
+: Letter? ( ch -- ? ) { [ letter? ] [ LETTER? ] } 1|| ; inline\r
+: alpha? ( ch -- ? ) { [ Letter? ] [ digit? ] } 1|| ; inline\r
+: ch>lower ( ch -- lower ) dup LETTER? [ HEX: 20 + ] when ; inline\r
+: >lower ( str -- lower ) [ ch>lower ] map ;\r
+: ch>upper ( ch -- upper ) dup letter? [ HEX: 20 - ] when ; inline\r
+: >upper ( str -- upper ) [ ch>upper ] map ;\r
+\r
+HINTS: >lower string ;\r
+HINTS: >upper string ;
\ No newline at end of file
-! Copyright (C) 2007, 2008 Slava Pestov.
+! Copyright (C) 2007, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors compiler cpu.architecture vocabs.loader system
sequences namespaces parser kernel kernel.private classes
enable-compiler
-: compile-uncompiled ( words -- )
- [ compiled>> not ] filter compile ;
+: compile-unoptimized ( words -- )
+ [ optimized>> not ] filter compile ;
nl
"Compiling..." write flush
wrap probe
namestack*
-} compile-uncompiled
+} compile-unoptimized
"." write flush
{
bitand bitor bitxor bitnot
-} compile-uncompiled
+} compile-unoptimized
"." write flush
{
+ 1+ 1- 2/ < <= > >= shift
-} compile-uncompiled
+} compile-unoptimized
"." write flush
{
new-sequence nth push pop peek flip
-} compile-uncompiled
+} compile-unoptimized
"." write flush
{
hashcode* = get set
-} compile-uncompiled
+} compile-unoptimized
"." write flush
{
memq? split harvest sift cut cut-slice start index clone
set-at reverse push-all class number>string string>number
-} compile-uncompiled
+} compile-unoptimized
"." write flush
{
lines prefix suffix unclip new-assoc update
word-prop set-word-prop 1array 2array 3array ?nth
-} compile-uncompiled
+} compile-unoptimized
"." write flush
{
malloc calloc free memcpy
-} compile-uncompiled
+} compile-unoptimized
"." write flush
-{ build-tree } compile-uncompiled
+{ build-tree } compile-unoptimized
"." write flush
-{ optimize-tree } compile-uncompiled
+{ optimize-tree } compile-unoptimized
"." write flush
-{ optimize-cfg } compile-uncompiled
+{ optimize-cfg } compile-unoptimized
"." write flush
-{ (compile) } compile-uncompiled
+{ (compile) } compile-unoptimized
"." write flush
-vocabs [ words compile-uncompiled "." write flush ] each
+vocabs [ words compile-unoptimized "." write flush ] each
" done" print flush
-! Copyright (C) 2004, 2008 Slava Pestov.
+! Copyright (C) 2004, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: alien arrays byte-arrays generic assocs hashtables assocs
hashtables.private io io.binary io.files io.encodings.binary
vocabs.loader source-files definitions debugger
quotations.private sequences.private combinators
math.order math.private accessors
-slots.private compiler.units ;
+slots.private compiler.units fry ;
IN: bootstrap.image
: arch ( os cpu -- arch )
: put-object ( n obj -- ) (objects) set-at ;
: cache-object ( obj quot -- value )
- [ (objects) ] dip [ obj>> ] prepose cache ; inline
+ [ (objects) ] dip '[ obj>> @ ] cache ; inline
! Constants
SYMBOL: sub-primitives
: make-jit ( quot rc rt offset -- quad )
- { [ { } make ] [ ] [ ] [ ] } spread 4array ; inline
+ [ { } make ] 3dip 4array ; inline
: jit-define ( quot rc rt offset name -- )
[ make-jit ] dip set ; inline
[ emit ] emit-object ;
! Strings
+: native> ( object -- object )
+ big-endian get [ [ be> ] map ] [ [ le> ] map ] if ;
+
: emit-bytes ( seq -- )
- bootstrap-cell <groups>
- big-endian get [ [ be> ] map ] [ [ le> ] map ] if
- emit-seq ;
+ bootstrap-cell <groups> native> emit-seq ;
: pad-bytes ( seq -- newseq )
dup length bootstrap-cell align 0 pad-right ;
-: check-string ( string -- )
- [ 127 > ] contains?
- [ "Bootstrap cannot emit non-ASCII strings" throw ] when ;
+: extended-part ( str -- str' )
+ dup [ 128 < ] all? [ drop f ] [
+ [ -7 shift 1 bitxor ] { } map-as
+ big-endian get
+ [ [ 2 >be ] { } map-as ]
+ [ [ 2 >le ] { } map-as ] if
+ B{ } join
+ ] if ;
+
+: ascii-part ( str -- str' )
+ [
+ [ 128 mod ] [ 128 >= ] bi
+ [ 128 bitor ] when
+ ] B{ } map-as ;
: emit-string ( string -- ptr )
- dup check-string
+ [ length ] [ extended-part ' ] [ ] tri
string type-number object tag-number [
- dup length emit-fixnum
- f ' emit
- f ' emit
- pad-bytes emit-bytes
+ [ emit-fixnum ]
+ [ emit ]
+ [ f ' emit ascii-part pad-bytes emit-bytes ]
+ tri*
] emit-object ;
M: string '
array>> '
quotation type-number object tag-number [
emit ! array
- f ' emit ! compiled>>
+ f ' emit ! compiled
0 emit ! xt
0 emit ! code
] emit-object
! Image output
: (write-image) ( image -- )
- bootstrap-cell big-endian get [
- [ >be write ] curry each
- ] [
- [ >le write ] curry each
- ] if ;
+ bootstrap-cell big-endian get
+ [ '[ _ >be write ] each ]
+ [ '[ _ >le write ] each ] if ;
: write-image ( image -- )
"Writing image to " write
"Core bootstrap completed in " write core-bootstrap-time get print-time
"Bootstrap completed in " write bootstrap-time get print-time
- [ compiled>> ] count-words " compiled words" print
+ [ optimized>> ] count-words " compiled words" print
[ symbol? ] count-words " symbol words" print
[ ] count-words " words total" print
+USE: unicode
\ No newline at end of file
} cond drop ;
: maybe-compile ( word -- )
- dup compiled>> [ drop ] [ queue-compile ] if ;
+ dup optimized>> [ drop ] [ queue-compile ] if ;
SYMBOL: +failed+
[ (compile) yield-hook get call ] slurp-deque ;
: decompile ( word -- )
- f 2array 1array t modify-code-heap ;
+ f 2array 1array modify-code-heap ;
: optimized-recompile-hook ( words -- alist )
[
{ tuple vector } 3 slot { word } declare
dup 1 slot 0 fixnum-bitand { [ ] } dispatch ;
-[ t ] [ \ dispatch-alignment-regression compiled>> ] unit-test
+[ t ] [ \ dispatch-alignment-regression optimized>> ] unit-test
[ vector ] [ dispatch-alignment-regression ] unit-test
GENERIC: xyz ( obj -- obj )
M: array xyz xyz ;
-[ t ] [ \ xyz compiled>> ] unit-test
+[ t ] [ \ xyz optimized>> ] unit-test
! Test predicate inlining
: pred-test-1
! regression
GENERIC: void-generic ( obj -- * )
: breakage ( -- * ) "hi" void-generic ;
-[ t ] [ \ breakage compiled>> ] unit-test
+[ t ] [ \ breakage optimized>> ] unit-test
[ breakage ] must-fail
! regression
! compiling <tuple> with a non-literal class failed
: <tuple>-regression ( class -- tuple ) <tuple> ;
-[ t ] [ \ <tuple>-regression compiled>> ] unit-test
+[ t ] [ \ <tuple>-regression optimized>> ] unit-test
GENERIC: foozul ( a -- b )
M: reversed foozul ;
: node-successor-f-bug ( x -- * )
[ 3 throw ] [ empty-compound ] compose [ 3 throw ] if ;
-[ t ] [ \ node-successor-f-bug compiled>> ] unit-test
+[ t ] [ \ node-successor-f-bug optimized>> ] unit-test
[ ] [ [ new ] build-tree optimize-tree drop ] unit-test
] if
] if ;
-[ t ] [ \ lift-throw-tail-regression compiled>> ] unit-test
+[ t ] [ \ lift-throw-tail-regression optimized>> ] unit-test
[ 3 "an integer" ] [ 3 lift-throw-tail-regression ] unit-test
[ "hi" "a string" ] [ "hi" lift-throw-tail-regression ] unit-test
: recursive-inline-hang-1 ( -- a )
{ } recursive-inline-hang ;
-[ t ] [ \ recursive-inline-hang-1 compiled>> ] unit-test
+[ t ] [ \ recursive-inline-hang-1 optimized>> ] unit-test
DEFER: recursive-inline-hang-3
USE: tools.test
-[ t ] [ \ expr compiled>> ] unit-test
-[ t ] [ \ ast>pipeline-expr compiled>> ] unit-test
+[ t ] [ \ expr optimized>> ] unit-test
+[ t ] [ \ ast>pipeline-expr optimized>> ] unit-test
: hey ( -- ) ;
: there ( -- ) hey ;
-[ t ] [ \ hey compiled>> ] unit-test
-[ t ] [ \ there compiled>> ] unit-test
+[ t ] [ \ hey optimized>> ] unit-test
+[ t ] [ \ there optimized>> ] unit-test
[ ] [ "IN: compiler.tests : hey ( -- ) 3 ;" eval ] unit-test
-[ f ] [ \ hey compiled>> ] unit-test
-[ f ] [ \ there compiled>> ] unit-test
+[ f ] [ \ hey optimized>> ] unit-test
+[ f ] [ \ there optimized>> ] unit-test
[ ] [ "IN: compiler.tests : hey ( -- ) ;" eval ] unit-test
-[ t ] [ \ there compiled>> ] unit-test
+[ t ] [ \ there optimized>> ] unit-test
: good ( -- ) ;
: bad ( -- ) good ;
: ugly ( -- ) bad ;
-[ t ] [ \ good compiled>> ] unit-test
-[ t ] [ \ bad compiled>> ] unit-test
-[ t ] [ \ ugly compiled>> ] unit-test
+[ t ] [ \ good optimized>> ] unit-test
+[ t ] [ \ bad optimized>> ] unit-test
+[ t ] [ \ ugly optimized>> ] unit-test
[ f ] [ \ good compiled-usage assoc-empty? ] unit-test
[ ] [ "IN: compiler.tests : good ( -- ) 3 ;" eval ] unit-test
-[ f ] [ \ good compiled>> ] unit-test
-[ f ] [ \ bad compiled>> ] unit-test
-[ f ] [ \ ugly compiled>> ] unit-test
+[ f ] [ \ good optimized>> ] unit-test
+[ f ] [ \ bad optimized>> ] unit-test
+[ f ] [ \ ugly optimized>> ] unit-test
[ t ] [ \ good compiled-usage assoc-empty? ] unit-test
[ ] [ "IN: compiler.tests : good ( -- ) ;" eval ] unit-test
-[ t ] [ \ good compiled>> ] unit-test
-[ t ] [ \ bad compiled>> ] unit-test
-[ t ] [ \ ugly compiled>> ] unit-test
+[ t ] [ \ good optimized>> ] unit-test
+[ t ] [ \ bad optimized>> ] unit-test
+[ t ] [ \ ugly optimized>> ] unit-test
[ f ] [ \ good compiled-usage assoc-empty? ] unit-test
: sheeple-test ( -- string ) { } sheeple ;
[ "sheeple" ] [ sheeple-test ] unit-test
-[ t ] [ \ sheeple-test compiled>> ] unit-test
+[ t ] [ \ sheeple-test optimized>> ] unit-test
[ t ] [ object \ sheeple method \ sheeple-test "compiled-uses" word-prop key? ] unit-test
[ f ] [ empty-mixin \ sheeple method \ sheeple-test "compiled-uses" word-prop key? ] unit-test
[ ] [ [ array empty-mixin remove-mixin-instance ] with-compilation-unit ] unit-test
[ "sheeple" ] [ sheeple-test ] unit-test
-[ t ] [ \ sheeple-test compiled>> ] unit-test
+[ t ] [ \ sheeple-test optimized>> ] unit-test
[ t ] [ object \ sheeple method \ sheeple-test "compiled-uses" word-prop key? ] unit-test
[ f ] [ empty-mixin \ sheeple method \ sheeple-test "compiled-uses" word-prop key? ] unit-test
10 [
[ "compiler.tests.foo" forget-vocab ] with-compilation-unit
[ t ] [
- "USING: prettyprint words accessors ; IN: compiler.tests.foo : (recursive) ( -- ) (recursive) (recursive) ; inline recursive : recursive ( -- ) (recursive) ; \\ (recursive) compiled>>" eval
+ "USING: prettyprint words accessors ; IN: compiler.tests.foo : (recursive) ( -- ) (recursive) (recursive) ; inline recursive : recursive ( -- ) (recursive) ; \\ (recursive) optimized>>" eval
] unit-test
] times
[ 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 ]
[ 1.0 float-spill-bug ] unit-test
-[ t ] [ \ float-spill-bug compiled>> ] unit-test
+[ t ] [ \ float-spill-bug optimized>> ] unit-test
: float-fixnum-spill-bug ( object -- object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object object )
{
[ 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 2.0 2 ]
[ 1.0 float-fixnum-spill-bug ] unit-test
-[ t ] [ \ float-fixnum-spill-bug compiled>> ] unit-test
+[ t ] [ \ float-fixnum-spill-bug optimized>> ] unit-test
: resolve-spill-bug ( a b -- c )
[ 1 fixnum+fast ] bi@ dup 10 fixnum< [
16 narray
] if ;
-[ t ] [ \ resolve-spill-bug compiled>> ] unit-test
+[ t ] [ \ resolve-spill-bug optimized>> ] unit-test
[ 4 ] [ 1 1 resolve-spill-bug ] unit-test
{ $subsection reply-synchronous }
"An example:"
{ $example
- "USING: concurrency.messaging kernel threads ;"
+ "USING: concurrency.messaging kernel prettyprint threads ;"
+ "IN: scratchpad"
": pong-server ( -- )"
" receive [ \"pong\" ] dip reply-synchronous ;"
"[ pong-server t ] \"pong-server\" spawn-server"
X: XOR. 1 316 31
X1: EXTSB 0 954 31
X1: EXTSB. 1 954 31
-: FMR ( a s -- ) 0 -rot 72 0 63 x-insn ;
-: FMR. ( a s -- ) 0 -rot 72 1 63 x-insn ;
-: FCTIWZ ( a s -- ) 0 -rot 0 15 63 x-insn ;
-: FCTIWZ. ( a s -- ) 0 -rot 1 15 63 x-insn ;
+: FMR ( a s -- ) [ 0 ] 2dip 72 0 63 x-insn ;
+: FMR. ( a s -- ) [ 0 ] 2dip 72 1 63 x-insn ;
+: FCTIWZ ( a s -- ) [ 0 ] 2dip 0 15 63 x-insn ;
+: FCTIWZ. ( a s -- ) [ 0 ] 2dip 1 15 63 x-insn ;
! XO-form
XO: ADD 0 0 266 31
GENERIC# (B) 2 ( dest aa lk -- )
M: integer (B) 18 i-insn ;
-M: word (B) 0 -rot (B) rc-relative-ppc-3 rel-word ;
-M: label (B) 0 -rot (B) rc-relative-ppc-3 label-fixup ;
+M: word (B) [ 0 ] 2dip (B) rc-relative-ppc-3 rel-word ;
+M: label (B) [ 0 ] 2dip (B) rc-relative-ppc-3 label-fixup ;
GENERIC: BC ( a b c -- )
M: integer BC 0 0 16 b-insn ;
[ swap slot-name>> rot set-slot-named ] [ <low-level-binding> ] bi ;
M: postgresql-statement bind-tuple ( tuple statement -- )
- tuck in-params>>
- [ postgresql-bind-conversion ] with map
+ [ nip ] [
+ in-params>>
+ [ postgresql-bind-conversion ] with map
+ ] 2bi
>>bind-params drop ;
M: postgresql-result-set #rows ( result-set -- n )
! High level
ERROR: no-slots-named class seq ;
: check-columns ( class columns -- )
- tuck
- [ [ first ] map ]
- [ all-slots [ name>> ] map ] bi* diff
+ [ nip ] [
+ [ [ first ] map ]
+ [ all-slots [ name>> ] map ] bi* diff
+ ] 2bi
[ drop ] [ no-slots-named ] if-empty ;
: define-persistent ( class table columns -- )
slot-named dup [ no-slot ] unless offset>> ;
: get-slot-named ( name tuple -- value )
- tuck offset-of-slot slot ;
+ [ nip ] [ offset-of-slot ] 2bi slot ;
: set-slot-named ( value name obj -- )
- tuck offset-of-slot set-slot ;
+ [ nip ] [ offset-of-slot ] 2bi set-slot ;
ERROR: not-persistent class ;
-! Copyright (C) 2008 Slava Pestov.
+! Copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: kernel sequences math ;
+USING: kernel sequences math fry ;
IN: deques
GENERIC: push-front* ( obj deque -- node )
[ peek-back ] [ pop-back* ] bi ;
: slurp-deque ( deque quot -- )
- [ drop [ deque-empty? not ] curry ]
- [ [ pop-back ] prepose curry ] 2bi [ ] while ; inline
+ [ drop '[ _ deque-empty? not ] ]
+ [ '[ _ pop-back @ ] ]
+ 2bi [ ] while ; inline
MIXIN: deque
-! Copyright (C) 2007, 2008 Mackenzie Straight, Doug Coleman,
+! Copyright (C) 2007, 2009 Mackenzie Straight, Doug Coleman,
! Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: combinators kernel math sequences accessors deques
-search-deques summary hashtables ;
+search-deques summary hashtables fry ;
IN: dlists
<PRIVATE
[ front>> ] dip (dlist-find-node) ; inline
: dlist-each-node ( dlist quot -- )
- [ f ] compose dlist-find-node 2drop ; inline
+ '[ @ f ] dlist-find-node 2drop ; inline
: unlink-node ( dlist-node -- )
dup prev>> over next>> set-prev-when
normalize-front ;
: dlist-find ( dlist quot -- obj/f ? )
- [ obj>> ] prepose
- dlist-find-node [ obj>> t ] [ drop f f ] if ; inline
+ '[ obj>> @ ] dlist-find-node [ obj>> t ] [ drop f f ] if ; inline
: dlist-contains? ( dlist quot -- ? )
dlist-find nip ; inline
] if ; inline
: delete-node-if ( dlist quot -- obj/f )
- [ obj>> ] prepose delete-node-if* drop ; inline
+ '[ obj>> @ ] delete-node-if* drop ; inline
M: dlist clear-deque ( dlist -- )
f >>front
drop ;
: dlist-each ( dlist quot -- )
- [ obj>> ] prepose dlist-each-node ; inline
+ '[ obj>> @ ] dlist-each-node ; inline
: dlist>seq ( dlist -- seq )
[ ] accumulator [ dlist-each ] dip ;
: 1dlist ( obj -- dlist ) <dlist> [ push-front ] keep ;
M: dlist clone
- <dlist> [
- [ push-back ] curry dlist-each
- ] keep ;
+ <dlist> [ '[ _ push-back ] dlist-each ] keep ;
INSTANCE: dlist deque
{ $description "Evaluates the Factor code in " { $snippet "str" } " with " { $link output-stream } " rebound to a string output stream, then outputs the resulting string." } ;
ARTICLE: "eval" "Evaluating strings at runtime"
-"Evaluating strings at runtime:"
+"The " { $vocab-link "eval" } " vocabulary implements support for evaluating strings at runtime."
{ $subsection eval }
{ $subsection eval>string } ;
--- /dev/null
+IN: eval.tests
+USING: eval tools.test ;
+
+[ "4\n" ] [ "USING: math prettyprint ; 2 2 + ." eval>string ] unit-test
-! Copyright (C) 2008 Slava Pestov.
+! Copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: splitting parser compiler.units kernel namespaces
-debugger io.streams.string ;
+debugger io.streams.string fry ;
IN: eval
+: parse-string ( str -- )
+ [ string-lines parse-lines ] with-compilation-unit ;
+
+: (eval) ( str -- )
+ parse-string call ;
+
: eval ( str -- )
- [ string-lines parse-fresh ] with-compilation-unit call ;
+ [ (eval) ] with-file-vocabs ;
-: eval>string ( str -- output )
+: (eval>string) ( str -- output )
[
+ "quiet" on
parser-notes off
- [ [ eval ] keep ] try drop
+ '[ _ (eval) ] try
] with-string-writer ;
+
+: eval>string ( str -- output )
+ [ (eval>string) ] with-file-vocabs ;
\ No newline at end of file
{ $description "Parses Farkup and outputs a tree of " { $link "farkup-ast" } "." } ;
HELP: (write-farkup)
-{ $values { "farkup" "a Farkup syntax tree node" } }
-{ $description "Writes a Farkup syntax tree as HTML on " { $link output-stream } "." } ;
+{ $values { "farkup" "a Farkup syntax tree node" } { "xml" "an XML chunk" } }
+{ $description "Converts a Farkup syntax tree node to XML." } ;
ARTICLE: "farkup-ast" "Farkup syntax tree nodes"
"The " { $link parse-farkup } " word outputs a tree of nodes corresponding to the Farkup syntax of the input string. This tree can be programatically traversed and mutated before being passed on to " { $link write-farkup } "."
[ "<p>=</p><h2>foo</h2>" ] [ "===foo==" convert-farkup ] unit-test
[ "<h1>foo</h1><p>=</p>" ] [ "=foo==" convert-farkup ] unit-test
-[ "<pre><span class='KEYWORD3'>int</span> <span class='FUNCTION'>main</span><span class='OPERATOR'>(</span><span class='OPERATOR'>)</span>\n</pre>" ]
+[ "<pre><span class=\"KEYWORD3\">int</span> <span class=\"FUNCTION\">main</span><span class=\"OPERATOR\">(</span><span class=\"OPERATOR\">)</span></pre>" ]
[ "[c{int main()}]" convert-farkup ] unit-test
-[ "<p><img src='lol.jpg'/></p>" ] [ "[[image:lol.jpg]]" convert-farkup ] unit-test
-[ "<p><img src='lol.jpg' alt='teh lol'/></p>" ] [ "[[image:lol.jpg|teh lol]]" convert-farkup ] unit-test
-[ "<p><a href='http://lol.com'>http://lol.com</a></p>" ] [ "[[http://lol.com]]" convert-farkup ] unit-test
-[ "<p><a href='http://lol.com'>haha</a></p>" ] [ "[[http://lol.com|haha]]" convert-farkup ] unit-test
-[ "<p><a href='Foo/Bar'>Bar</a></p>" ] [ "[[Foo/Bar]]" convert-farkup ] unit-test
+[ "<p><img src=\"lol.jpg\"/></p>" ] [ "[[image:lol.jpg]]" convert-farkup ] unit-test
+[ "<p><img src=\"lol.jpg\" alt=\"teh lol\"/></p>" ] [ "[[image:lol.jpg|teh lol]]" convert-farkup ] unit-test
+[ "<p><a href=\"http://lol.com\">http://lol.com</a></p>" ] [ "[[http://lol.com]]" convert-farkup ] unit-test
+[ "<p><a href=\"http://lol.com\">haha</a></p>" ] [ "[[http://lol.com|haha]]" convert-farkup ] unit-test
+[ "<p><a href=\"Foo/Bar\">Bar</a></p>" ] [ "[[Foo/Bar]]" convert-farkup ] unit-test
"/wiki/view/" relative-link-prefix [
- [ "<p><a href='/wiki/view/Foo/Bar'>Bar</a></p>" ] [ "[[Foo/Bar]]" convert-farkup ] unit-test
+ [ "<p><a href=\"/wiki/view/Foo/Bar\">Bar</a></p>" ] [ "[[Foo/Bar]]" convert-farkup ] unit-test
] with-variable
[ ] [ "[{}]" convert-farkup drop ] unit-test
-[ "<pre>hello\n</pre>" ] [ "[{hello}]" convert-farkup ] unit-test
+[ "<pre>hello</pre>" ] [ "[{hello}]" convert-farkup ] unit-test
[
"<p>Feature comparison:\n<table><tr><td>a</td><td>Factor</td><td>Java</td><td>Lisp</td></tr><tr><td>Coolness</td><td>Yes</td><td>No</td><td>No</td></tr><tr><td>Badass</td><td>Yes</td><td>No</td><td>No</td></tr><tr><td>Enterprise</td><td>Yes</td><td>Yes</td><td>No</td></tr><tr><td>Kosher</td><td>Yes</td><td>No</td><td>Yes</td></tr></table></p>"
] [ "Feature comparison:\n\n|a|Factor|Java|Lisp|\n|Coolness|Yes|No|No|\n|Badass|Yes|No|No|\n|Enterprise|Yes|Yes|No|\n|Kosher|Yes|No|Yes|\n" convert-farkup ] unit-test
[
- "<p>This wiki is written in <a href='Factor'>Factor</a> and is hosted on a <a href='http://linode.com'>http://linode.com</a> virtual server.</p>"
+ "<p>This wiki is written in <a href=\"Factor\">Factor</a> and is hosted on a <a href=\"http://linode.com\">http://linode.com</a> virtual server.</p>"
] [
"This wiki is written in [[Factor]] and is hosted on a [[http://linode.com|http://linode.com]] virtual server."
convert-farkup
] unit-test
-[ "<p><a href='a'>a</a> <a href='b'>c</a></p>" ] [ "[[a]] [[b|c]]" convert-farkup ] unit-test
+[ "<p><a href=\"a\">a</a> <a href=\"b\">c</a></p>" ] [ "[[a]] [[b|c]]" convert-farkup ] unit-test
-[ "<p><a href='C%2b%2b'>C++</a></p>" ] [ "[[C++]]" convert-farkup ] unit-test
+[ "<p><a href=\"C%2b%2b\">C++</a></p>" ] [ "[[C++]]" convert-farkup ] unit-test
[ "<p><foo></p>" ] [ "<foo>" convert-farkup ] unit-test
[ "<hr/>" ] [ "___" convert-farkup ] unit-test
[ "<hr/>\n" ] [ "___\n" convert-farkup ] unit-test
-[ "<p>before:\n<pre><span class='OPERATOR'>{</span> <span class='DIGIT'>1</span> <span class='DIGIT'>2</span> <span class='DIGIT'>3</span> <span class='OPERATOR'>}</span> <span class='DIGIT'>1</span> tail\n</pre></p>" ]
+[ "<p>before:\n<pre><span class=\"OPERATOR\">{</span> <span class=\"DIGIT\">1</span> <span class=\"DIGIT\">2</span> <span class=\"DIGIT\">3</span> <span class=\"OPERATOR\">}</span> <span class=\"DIGIT\">1</span> tail</pre></p>" ]
[ "before:\n[factor{{ 1 2 3 } 1 tail}]" convert-farkup ] unit-test
-[ "<p><a href='Factor'>Factor</a>-rific!</p>" ]
+[ "<p><a href=\"Factor\">Factor</a>-rific!</p>" ]
[ "[[Factor]]-rific!" convert-farkup ] unit-test
[ "<p>[ factor { 1 2 3 }]</p>" ]
convert-farkup string>xml-chunk
"a" deep-tag-named "href" swap at url-decode ;
-[ "Trader Joe's" ] [ "[[Trader Joe's]]" check-link-escaping ] unit-test
+[ "Trader Joe\"s" ] [ "[[Trader Joe\"s]]" check-link-escaping ] unit-test
[ "<foo>" ] [ "[[<foo>]]" check-link-escaping ] unit-test
[ "&blah;" ] [ "[[&blah;]]" check-link-escaping ] unit-test
-[ "C++" ] [ "[[C++]]" check-link-escaping ] unit-test
\ No newline at end of file
+[ "C++" ] [ "[[C++]]" check-link-escaping ] unit-test
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays combinators html.elements io
io.streams.string kernel math namespaces peg peg.ebnf
-sequences sequences.deep strings xml.entities
-vectors splitting xmode.code2html urls.encoding ;
+sequences sequences.deep strings xml.entities xml.interpolate
+vectors splitting xmode.code2html urls.encoding xml.data
+xml.writer ;
IN: farkup
SYMBOL: relative-link-prefix
=> [[ second >string inline-code boa ]]
link-content = (!("|"|"]").)+
+ => [[ >string ]]
image-link = "[[image:" link-content "|" link-content "]]"
=> [[ [ second >string ] [ fourth >string ] bi image boa ]]
simple-code
= "[{" (!("}]").)+ "}]"
- => [[ second f swap code boa ]]
+ => [[ second >string f swap code boa ]]
code = named-code | simple-code
{ [ dup [ 127 > ] contains? ] [ drop invalid-url ] }
{ [ dup first "/\\" member? ] [ drop invalid-url ] }
{ [ CHAR: : over member? ] [ dup absolute-url? [ drop invalid-url ] unless ] }
- [ relative-link-prefix get prepend ]
- } cond ;
+ [ relative-link-prefix get prepend "" like ]
+ } cond url-encode ;
-: escape-link ( href text -- href-esc text-esc )
- [ check-url ] dip escape-string ;
+: write-link ( href text -- xml )
+ [ check-url link-no-follow? get "true" and ] dip
+ [XML <a href=<-> nofollow=<->><-></a> XML] ;
-: write-link ( href text -- )
- escape-link
- [ <a url-encode =href link-no-follow? get [ "true" =nofollow ] when a> ]
- [ write </a> ]
- bi* ;
-
-: write-image-link ( href text -- )
+: write-image-link ( href text -- xml )
disable-images? get [
2drop
- <strong> "Images are not allowed" write </strong>
+ [XML <strong>Images are not allowed</strong> XML]
] [
- escape-link
- [ <img url-encode =src ] [ [ =alt ] unless-empty img/> ] bi*
+ [ check-url ] [ f like ] bi*
+ [XML <img src=<-> alt=<->/> XML]
] if ;
-: render-code ( string mode -- string' )
- [ string-lines ] dip
- [
- <pre>
- htmlize-lines
- </pre>
- ] with-string-writer write ;
-
-GENERIC: (write-farkup) ( farkup -- )
-: <foo.> ( string -- ) <foo> write ;
-: </foo.> ( string -- ) </foo> write ;
-: in-tag. ( obj quot string -- ) [ <foo.> call ] keep </foo.> ; inline
-M: heading1 (write-farkup) [ child>> (write-farkup) ] "h1" in-tag. ;
-M: heading2 (write-farkup) [ child>> (write-farkup) ] "h2" in-tag. ;
-M: heading3 (write-farkup) [ child>> (write-farkup) ] "h3" in-tag. ;
-M: heading4 (write-farkup) [ child>> (write-farkup) ] "h4" in-tag. ;
-M: strong (write-farkup) [ child>> (write-farkup) ] "strong" in-tag. ;
-M: emphasis (write-farkup) [ child>> (write-farkup) ] "em" in-tag. ;
-M: superscript (write-farkup) [ child>> (write-farkup) ] "sup" in-tag. ;
-M: subscript (write-farkup) [ child>> (write-farkup) ] "sub" in-tag. ;
-M: inline-code (write-farkup) [ child>> (write-farkup) ] "code" in-tag. ;
-M: list-item (write-farkup) [ child>> (write-farkup) ] "li" in-tag. ;
-M: unordered-list (write-farkup) [ child>> (write-farkup) ] "ul" in-tag. ;
-M: ordered-list (write-farkup) [ child>> (write-farkup) ] "ol" in-tag. ;
-M: paragraph (write-farkup) [ child>> (write-farkup) ] "p" in-tag. ;
-M: link (write-farkup) [ href>> ] [ text>> ] bi write-link ;
-M: image (write-farkup) [ href>> ] [ text>> ] bi write-image-link ;
-M: code (write-farkup) [ string>> ] [ mode>> ] bi render-code ;
-M: line (write-farkup) drop <hr/> ;
-M: line-break (write-farkup) drop <br/> nl ;
-M: table-row (write-farkup) ( obj -- )
- child>> [ [ [ (write-farkup) ] "td" in-tag. ] each ] "tr" in-tag. ;
-M: table (write-farkup) [ child>> (write-farkup) ] "table" in-tag. ;
-M: string (write-farkup) escape-string write ;
-M: vector (write-farkup) [ (write-farkup) ] each ;
-M: f (write-farkup) drop ;
+: render-code ( string mode -- xml )
+ [ string-lines ] dip htmlize-lines
+ [XML <pre><-></pre> XML] ;
-: write-farkup ( string -- )
+GENERIC: (write-farkup) ( farkup -- xml )
+
+: farkup-inside ( farkup name -- xml )
+ <simple-name> swap T{ attrs } swap
+ child>> (write-farkup) 1array <tag> ;
+
+M: heading1 (write-farkup) "h1" farkup-inside ;
+M: heading2 (write-farkup) "h2" farkup-inside ;
+M: heading3 (write-farkup) "h3" farkup-inside ;
+M: heading4 (write-farkup) "h4" farkup-inside ;
+M: strong (write-farkup) "strong" farkup-inside ;
+M: emphasis (write-farkup) "em" farkup-inside ;
+M: superscript (write-farkup) "sup" farkup-inside ;
+M: subscript (write-farkup) "sub" farkup-inside ;
+M: inline-code (write-farkup) "code" farkup-inside ;
+M: list-item (write-farkup) "li" farkup-inside ;
+M: unordered-list (write-farkup) "ul" farkup-inside ;
+M: ordered-list (write-farkup) "ol" farkup-inside ;
+M: paragraph (write-farkup) "p" farkup-inside ;
+M: table (write-farkup) "table" farkup-inside ;
+
+M: link (write-farkup)
+ [ href>> ] [ text>> ] bi write-link ;
+
+M: image (write-farkup)
+ [ href>> ] [ text>> ] bi write-image-link ;
+
+M: code (write-farkup)
+ [ string>> ] [ mode>> ] bi render-code ;
+
+M: line (write-farkup)
+ drop [XML <hr/> XML] ;
+
+M: line-break (write-farkup)
+ drop [XML <br/> XML] ;
+
+M: table-row (write-farkup)
+ child>>
+ [ (write-farkup) [XML <td><-></td> XML] ] map
+ [XML <tr><-></tr> XML] ;
+
+M: string (write-farkup) ;
+
+M: vector (write-farkup) [ (write-farkup) ] map ;
+
+M: f (write-farkup) ;
+
+: farkup>xml ( string -- xml )
parse-farkup (write-farkup) ;
+: write-farkup ( string -- )
+ farkup>xml write-xml-chunk ;
+
: convert-farkup ( string -- string' )
- parse-farkup [ (write-farkup) ] with-string-writer ;
+ [ write-farkup ] with-string-writer ;
{ $values { "format-string" string } }
{ $description
"Writes the arguments (specified on the stack) formatted according to the format string.\n"
- "\n"
+ $nl
"Several format specifications exist for handling arguments of different types, and "
"specifying attributes for the result string, including such things as maximum width, "
"padding, and decimals.\n"
{ "%+Px" "Hexadecimal" "hex" }
{ "%+PX" "Hexadecimal uppercase" "hex" }
}
- "\n"
+ $nl
"A plus sign ('+') is used to optionally specify that the number should be "
"formatted with a '+' preceeding it if positive.\n"
- "\n"
+ $nl
"Padding ('P') is used to optionally specify the minimum width of the result "
"string, the padding character, and the alignment. By default, the padding "
"character defaults to a space and the alignment defaults to right-aligned. "
"\"%'#5f\" formats a float padding with '#' up to 3 characters wide."
"\"%-10d\" formats an integer to 10 characters wide and left-aligns."
}
- "\n"
+ $nl
"Digits ('D') is used to optionally specify the maximum digits in the result "
"string. For example:\n"
{ $list
{ $values { "format-string" string } }
{ $description
"Writes the timestamp (specified on the stack) formatted according to the format string.\n"
- "\n"
+ $nl
"Different attributes of the timestamp can be retrieved using format specifications.\n"
{ $table
{ "%a" "Abbreviated weekday name." }
} ;
ARTICLE: "formatting" "Formatted printing"
-"The " { $vocab-link "formatting" } " vocabulary is used for formatted printing.\n"
+"The " { $vocab-link "formatting" } " vocabulary is used for formatted printing."
{ $subsection printf }
{ $subsection sprintf }
{ $subsection strftime }
"'[ [ _ key? ] all? ] filter"\r
"[ [ key? ] curry all? ] curry filter"\r
}\r
-"There is a mapping from fried quotations to lexical closures as defined in the " { $vocab-link "locals" } " vocabulary. Namely, a fried quotation is equivalent to a ``let'' form where each local binding is only used once, and bindings are used in the same order in which they are defined. The following two lines are equivalent:"\r
+"There is a mapping from fried quotations to lexical closures as defined in the " { $vocab-link "locals" } " vocabulary. Namely, a fried quotation is equivalent to a “let†form where each local binding is only used once, and bindings are used in the same order in which they are defined. The following two lines are equivalent:"\r
{ $code\r
"'[ 3 _ + 4 _ / ]"\r
"[let | a [ ] b [ ] | [ 3 a + 4 b / ] ]"\r
} ;\r
\r
ARTICLE: "fry" "Fried quotations"\r
-"The " { $vocab-link "fry" } " vocabulary implements " { $emphasis "fried quotation" } ". Conceptually, fried quotations are quotations with ``holes'' (more formally, " { $emphasis "fry specifiers" } "), and the holes are filled in when the fried quotation is pushed on the stack."\r
+"The " { $vocab-link "fry" } " vocabulary implements " { $emphasis "fried quotation" } ". Conceptually, fried quotations are quotations with “holes†(more formally, " { $emphasis "fry specifiers" } "), and the holes are filled in when the fried quotation is pushed on the stack."\r
$nl\r
"Fried quotations are started by a special parsing word:"\r
{ $subsection POSTPONE: '[ }\r
-"There are two types of fry specifiers; the first can hold a value, and the second ``splices'' a quotation, as if it were inserted without surrounding brackets:"\r
+"There are two types of fry specifiers; the first can hold a value, and the second “splices†a quotation, as if it were inserted without surrounding brackets:"\r
{ $subsection _ }\r
{ $subsection @ }\r
"The holes are filled in with the top of stack going in the rightmost hole, the second item on the stack going in the second hole from the right, and so on."\r
{ $subsection "furnace.auth.providers.db" } ;
ARTICLE: "furnace.auth.features" "Optional authentication features"
-"Vocabularies having names prefixed by " { $code "furnace.auth.features" } " implement optional features which can be enabled by calling special words. These words define new actions on an authentication realm."
+"Vocabularies having names prefixed by " { $code "furnace.auth.features" } " implement optional features which can be enabled by calling special words. These words define new actions on an authentication realm."
{ $subsection "furnace.auth.features.deactivate-user" }
{ $subsection "furnace.auth.features.edit-profile" }
{ $subsection "furnace.auth.features.recover-password" }
"User profile variables have the same restrictions on their values as session variables; see " { $link "furnace.sessions.serialize" } " for a discussion." ;
ARTICLE: "furnace.auth.example" "Furnace authentication example"
-"The " { $vocab-link "webapps.todo" } " vocabulary wraps all of its responders in a protected responder. The " { $slot "description" } " slot is set so that the login page contains the message ``You must log in to view your todo list'':"
+"The " { $vocab-link "webapps.todo" } " vocabulary wraps all of its responders in a protected responder. The " { $slot "description" } " slot is set so that the login page contains the message “You must log in to view your todo listâ€:"
{ $code
<" <protected>
"view your todo list" >>description">
over email>> 1array >>to
[
"This e-mail was sent by the application server on " % current-host % "\n" %
- "because somebody, maybe you, clicked on a ``recover password'' link in the\n" %
+ "because somebody, maybe you, clicked on a “recover password†link in the\n" %
"login form, and requested a new password for the user named ``" %
over username>> % "''.\n" %
"\n" %
HELP: feed-entry-description
{ $values
{ "object" object }
- { "description" null }
+ { "description" string }
}
{ $contract "Outputs a feed entry description." } ;
dup method>> {
{ "GET" [ url>> query>> ] }
{ "HEAD" [ url>> query>> ] }
- { "POST" [
- post-data>>
- dup content-type>> "application/x-www-form-urlencoded" =
- [ content>> ] [ drop f ] if
- ] }
+ { "POST" [ post-data>> params>> ] }
} case ;
: referrer ( -- referrer/f )
! Copyright (C) 2005, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel math math.order strings arrays vectors sequences
-sequences.private accessors ;
+sequences.private accessors fry ;
IN: grouping
<PRIVATE
[ first2-unsafe ] dip call
] [
[ 2 <sliced-clumps> ] dip
- [ first2-unsafe ] prepose all?
+ '[ first2-unsafe @ ] all?
] if
] if ; inline
: random-alist ( n -- alist )
[
- [
- 32 random-bits dup number>string swap set
- ] times
- ] H{ } make-assoc ;
+ drop 32 random-bits dup number>string
+ ] H{ } map>assoc ;
: test-heap-sort ( n -- ? )
random-alist dup >alist sort-keys swap heap-sort = ;
{ $code "\"file.txt\" utf16 file-contents" }
"Encoding descriptors are also used by " { $link "io.streams.byte-array" } " and taken by combinators like " { $link with-file-writer } " and " { $link with-byte-reader } " which deal with streams. It is " { $emphasis "not" } " used with " { $link "io.streams.string" } " because these deal with abstract text."
$nl
-"When the " { $link binary } " encoding is used, a " { $link byte-array } " is expected for writing and returned for reading, since the stream deals with bytes. All other encodings deal with strings, since they are used to represent text." ;
+"When the " { $link binary } " encoding is used, a " { $link byte-array } " is expected for writing and returned for reading, since the stream deals with bytes. All other encodings deal with strings, since they are used to represent text."
+{ $see-also "stream-elements" } ;
ARTICLE: "io" "Input and output"
{ $heading "Streams" }
"Elements used in " { $link $values } " forms:"
{ $subsection $instance }
{ $subsection $maybe }
+{ $subsection $or }
{ $subsection $quotation }
"Boilerplate paragraphs:"
{ $subsection $low-level-note }
{ "an array of markup elements," }
{ "or an array of the form " { $snippet "{ $directive content... }" } ", where " { $snippet "$directive" } " is a markup word whose name starts with " { $snippet "$" } ", and " { $snippet "content..." } " is a series of markup elements" }
}
+"Here is a more formal schema for the help markup language:"
+{ $code
+"<element> ::== <string> | <simple-element> | <fancy-element>"
+"<simple-element> ::== { <element>* }"
+"<fancy-element> ::== { <type> <element> }"
+}
{ $subsection "element-types" }
{ $subsection "printing-elements" }
"Related words can be cross-referenced:"
"The help system maintains documentation written in a simple markup language, along with cross-referencing and search. Documentation can either exist as free-standing " { $emphasis "articles" } " or be associated with words."
{ $subsection "browsing-help" }
{ $subsection "writing-help" }
-{ $vocab-subsection "Help lint tool" "help.lint" }
+{ $subsection "help.lint" }
{ $subsection "help-impl" } ;
IN: help
-! Copyright (C) 2006, 2008 Slava Pestov.
+! Copyright (C) 2006, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: fry accessors sequences parser kernel help help.markup
help.topics words strings classes tools.vocabs namespaces make
combinators combinators.short-circuit splitting debugger
hashtables sorting effects vocabs vocabs.loader assocs editors
continuations classes.predicate macros math sets eval
-vocabs.parser words.symbol values ;
+vocabs.parser words.symbol values grouping unicode.categories
+sequences.deep ;
IN: help.lint
+SYMBOL: vocabs-quot
+
: check-example ( element -- )
- rest [
- but-last "\n" join 1vector
- [
- use [ clone ] change
- [ eval>string ] with-datastack
- ] with-scope peek "\n" ?tail drop
- ] keep
- peek assert= ;
+ [
+ rest [
+ but-last "\n" join 1vector
+ [ (eval>string) ] with-datastack
+ peek "\n" ?tail drop
+ ] keep
+ peek assert=
+ ] vocabs-quot get call ;
-: check-examples ( word element -- )
- nip \ $example swap elements [ check-example ] each ;
+: check-examples ( element -- )
+ \ $example swap elements [ check-example ] each ;
: extract-values ( element -- seq )
\ $values swap elements dup empty? [
]
} 2|| [ "$values don't match stack effect" throw ] unless ;
-: check-see-also ( word element -- )
- nip \ $see-also swap elements [
+: check-nulls ( element -- )
+ \ $values swap elements
+ null swap deep-member?
+ [ "$values should not contain null" throw ] when ;
+
+: check-see-also ( element -- )
+ \ $see-also swap elements [
rest dup prune [ length ] bi@ assert=
] each ;
] each ;
: check-rendering ( element -- )
- [ print-topic ] with-string-writer drop ;
+ [ print-content ] with-string-writer drop ;
+
+: check-strings ( str -- )
+ [
+ "\n\t" intersects?
+ [ "Paragraph text should not contain \\n or \\t" throw ] when
+ ] [
+ " " swap subseq?
+ [ "Paragraph text should not contain double spaces" throw ] when
+ ] bi ;
+
+: check-whitespace ( str1 str2 -- )
+ [ " " tail? ] [ " " head? ] bi* or
+ [ "Missing whitespace between strings" throw ] unless ;
+
+: check-bogus-nl ( element -- )
+ { { $nl } { { $nl } } } [ head? ] with contains?
+ [ "Simple element should not begin with a paragraph break" throw ] when ;
+
+: check-elements ( element -- )
+ {
+ [ check-bogus-nl ]
+ [ [ string? ] filter [ check-strings ] each ]
+ [ [ simple-element? ] filter [ check-elements ] each ]
+ [ 2 <clumps> [ [ string? ] all? ] filter [ first2 check-whitespace ] each ]
+ } cleave ;
+
+: check-markup ( element -- )
+ {
+ [ check-elements ]
+ [ check-rendering ]
+ [ check-examples ]
+ [ check-modules ]
+ } cleave ;
: all-word-help ( words -- seq )
[ word-help ] filter ;
-TUPLE: help-error topic error ;
+TUPLE: help-error error topic ;
C: <help-error> help-error
M: help-error error.
- "In " write dup topic>> pprint nl
- error>> error. ;
+ [ "In " write topic>> pprint nl ]
+ [ error>> error. ]
+ bi ;
: check-something ( obj quot -- )
- flush [ <help-error> , ] recover ; inline
+ flush '[ _ assert-depth ] swap '[ _ <help-error> , ] recover ; inline
: check-word ( word -- )
+ [ with-file-vocabs ] vocabs-quot set
dup word-help [
- [
- dup word-help '[
- _ _ {
- [ check-examples ]
- [ check-values ]
- [ check-see-also ]
- [ [ check-rendering ] [ check-modules ] bi* ]
- } 2cleave
- ] assert-depth
+ dup '[
+ _ dup word-help
+ [ check-values ]
+ [ nip [ check-nulls ] [ check-see-also ] [ check-markup ] tri ] 2bi
] check-something
] [ drop ] if ;
: check-words ( words -- ) [ check-word ] each ;
+: check-article-title ( article -- )
+ article-title first LETTER?
+ [ "Article title must begin with a capital letter" throw ] unless ;
+
: check-article ( article -- )
- [
- dup article-content
- '[ _ check-rendering _ check-modules ]
- assert-depth
+ [ with-interactive-vocabs ] vocabs-quot set
+ dup '[
+ _
+ [ check-article-title ]
+ [ article-content check-markup ] bi
] check-something ;
: files>vocabs ( -- assoc )
] keep ;
: check-about ( vocab -- )
- [ vocab-help [ article drop ] when* ] check-something ;
+ dup '[ _ vocab-help [ article drop ] when* ] check-something ;
: check-vocab ( vocab -- seq )
"Checking " write dup write "..." print
USING: definitions help help.markup kernel sequences tools.test
-words parser namespaces assocs generic io.streams.string accessors ;
+words parser namespaces assocs generic io.streams.string accessors
+strings math ;
IN: help.markup.tests
TUPLE: blahblah quux ;
[ ] [ \ fooey print-topic ] unit-test
[ ] [ gensym print-topic ] unit-test
+
+[ "a string" ]
+[ [ { $or string } print-element ] with-string-writer ] unit-test
+
+[ "a string or an integer" ]
+[ [ { $or string integer } print-element ] with-string-writer ] unit-test
+
+[ "a string, a fixnum, or an integer" ]
+[ [ { $or string fixnum integer } print-element ] with-string-writer ] unit-test
-! Copyright (C) 2005, 2008 Slava Pestov.
+! Copyright (C) 2005, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays definitions generic io kernel assocs
hashtables namespaces make parser prettyprint sequences strings
io.styles vectors words math sorting splitting classes slots
-vocabs help.stylesheet help.topics vocabs.loader quotations ;
+vocabs help.stylesheet help.topics vocabs.loader quotations
+combinators ;
IN: help.markup
-! Simple markup language.
-
-! <element> ::== <string> | <simple-element> | <fancy-element>
-! <simple-element> ::== { <element>* }
-! <fancy-element> ::== { <type> <element> }
-
-! Element types are words whose name begins with $.
-
PREDICATE: simple-element < array
[ t ] [ first word? not ] if-empty ;
: $instance ( element -- ) first ($instance) ;
+: $or ( element -- )
+ dup length {
+ { 1 [ first ($instance) ] }
+ { 2 [ first2 [ ($instance) " or " print-element ] [ ($instance) ] bi* ] }
+ [
+ drop
+ unclip-last
+ [ [ ($instance) ", " print-element ] each ]
+ [ "or " print-element ($instance) ]
+ bi*
+ ]
+ } case ;
+
: $maybe ( element -- )
- $instance " or " print-element { f } $instance ;
+ f suffix $or ;
: $quotation ( element -- )
{ "a " { $link quotation } " with stack effect " } print-element
"! See http://factorcode.org/license.txt for BSD license."
"IN: palindrome"
}
-"We will now write our first word using " { $link POSTPONE: : } ". This word will test if a string is a palindrome; it will take a string as input, and give back a boolean as output. We will call this word " { $snippet "palindrome?" } ", following a naming convention that words returning booleans have names ending with " { $snippet "?" } "."
+"We will now write our first word using " { $link POSTPONE: : } ". This word will test if a string is a palindrome; it will take a string as input, and give back a boolean as output. We will call this word " { $snippet "palindrome?" } ", following a naming convention that words returning booleans have names ending with " { $snippet "?" } "."
$nl
"Recall that a string is a palindrome if it is spelled the same forwards or backwards; that is, if the string is equal to its reverse. We can express this in Factor as follows:"
{ $code ": palindrome? ( string -- ? ) dup reverse = ;" }
"For example, we'd like it to identify the following as a palindrome:"
{ $code "\"A man, a plan, a canal: Panama.\"" }
"However, right now, the simplistic algorithm we use says this is not a palindrome:"
-{ $example "\"A man, a plan, a canal: Panama.\" palindrome?" "f" }
+{ $unchecked-example "\"A man, a plan, a canal: Panama.\" palindrome?" "f" }
"We would like it to output " { $link t } " there. We can encode this requirement with a unit test that we add to " { $snippet "palindrome-tests.factor" } ":"
{ $code "[ t ] [ \"A man, a plan, a canal: Panama.\" palindrome? ] unit-test" }
"If you now run unit tests, you will see a unit test failure:"
"Start by pushing a character on the stack; notice that characters are really just integers:"
{ $code "CHAR: a" }
"Now, use the " { $link Letter? } " word to test if it is an alphabetical character, upper or lower case:"
-{ $example "Letter? ." "t" }
+{ $unchecked-example "Letter? ." "t" }
"This gives the expected result."
$nl
"Now try with a non-alphabetical character:"
{ $code "CHAR: #" }
-{ $example "Letter? ." "f" }
+{ $unchecked-example "Letter? ." "f" }
"What we want to do is given a string, remove all characters which do not match the " { $link Letter? } " predicate. Let's push a string on the stack:"
{ $code "\"A man, a plan, a canal: Panama.\"" }
"Now, place a quotation containing " { $link Letter? } " on the stack; quoting code places it on the stack instead of executing it immediately:"
{ $description "Renders an HTML component to the " { $link output-stream } "." } ;
HELP: render*
-{ $values { "value" "a value" } { "name" "a value name" } { "renderer" "a component renderer" } }
-{ $contract "Renders an HTML component to the " { $link output-stream } "." } ;
+{ $values { "value" "a value" } { "name" "a value name" } { "renderer" "a component renderer" } { "xml" "an XML chunk" } }
+{ $contract "Renders an HTML component, outputting an XHTML snippet." } ;
ARTICLE: "html.components" "HTML components"
"The " { $vocab-link "html.components" } " vocabulary provides various HTML form components."
] with-string-writer
] unit-test
-[ "<input type='hidden' name='red' value='<jimmy>'/>" ] [
+[ "<input value=\"<jimmy>\" name=\"red\" type=\"hidden\"/>" ] [
[
"red" hidden render
] with-string-writer
[ ] [ "'jimmy'" "red" set-value ] unit-test
-[ "<input type='text' size='5' name='red' value=''jimmy''/>" ] [
+[ "<input value=\"'jimmy'\" name=\"red\" size=\"5\" type=\"text\"/>" ] [
[
"red" <field> 5 >>size render
] with-string-writer
] unit-test
-[ "<input type='password' size='5' name='red' value=''/>" ] [
+[ "<input value=\"\" name=\"red\" size=\"5\" type=\"password\"/>" ] [
[
"red" <password> 5 >>size render
] with-string-writer
[ ] [ t "delivery" set-value ] unit-test
-[ "<input type='checkbox' name='delivery' checked='true'>Delivery</input>" ] [
+[ "<input type=\"checkbox\" checked=\"true\" name=\"delivery\">Delivery</input>" ] [
[
"delivery"
<checkbox>
[ ] [ f "delivery" set-value ] unit-test
-[ "<input type='checkbox' name='delivery'>Delivery</input>" ] [
+[ "<input type=\"checkbox\" name=\"delivery\">Delivery</input>" ] [
[
"delivery"
<checkbox>
[ ] [ link-test "link" set-value ] unit-test
-[ "<a href='http://www.apple.com/foo&bar'><Link Title></a>" ] [
+[ "<a href=\"http://www.apple.com/foo&bar\"><Link Title></a>" ] [
[ "link" link new render ] with-string-writer
] unit-test
[ ] [ "java" "mode" set-value ] unit-test
-[ "<span class='KEYWORD3'>int</span> x <span class='OPERATOR'>=</span> <span class='DIGIT'>4</span>;\n" ] [
+[ "<span class=\"KEYWORD3\">int</span> x <span class=\"OPERATOR\">=</span> <span class=\"DIGIT\">4</span>;" ] [
[ "code" <code> "mode" >>mode render ] with-string-writer
] unit-test
[ t ] [
[ "object" inspector render ] with-string-writer
+ USING: splitting sequences ;
+ "\"" split "'" join ! replace " with ' for now
[ "object" value [ describe ] with-html-writer ] with-string-writer
=
] unit-test
USING: accessors kernel namespaces io math.parser assocs classes
classes.tuple words arrays sequences splitting mirrors
hashtables combinators continuations math strings inspector
-fry locals calendar calendar.format xml.entities
-validators urls present
-xmode.code2html lcs.diff2html farkup
+fry locals calendar calendar.format xml.entities xml.data
+validators urls present xml.writer xml.interpolate xml
+xmode.code2html lcs.diff2html farkup io.streams.string
html.elements html.streams html.forms ;
IN: html.components
-GENERIC: render* ( value name renderer -- )
+GENERIC: render* ( value name renderer -- xml )
: render ( name renderer -- )
prepare-value
[ f swap ]
if
] 2dip
- render*
+ render* write-xml-chunk
[ render-error ] when* ;
<PRIVATE
-: render-input ( value name type -- )
- <input =type =name present =value input/> ;
+: render-input ( value name type -- xml )
+ [XML <input value=<-> name=<-> type=<->/> XML] ;
PRIVATE>
SINGLETON: label
-M: label render* 2drop present escape-string write ;
+M: label render*
+ 2drop present ;
SINGLETON: hidden
-M: hidden render* drop "hidden" render-input ;
+M: hidden render*
+ drop "hidden" render-input ;
-: render-field ( value name size type -- )
- <input
- =type
- [ present =size ] when*
- =name
- present =value
- input/> ;
+: render-field ( value name size type -- xml )
+ [XML <input value=<-> name=<-> size=<-> type=<->/> XML] ;
TUPLE: field size ;
: <field> ( -- field )
field new ;
-M: field render* size>> "text" render-field ;
+M: field render*
+ size>> "text" render-field ;
TUPLE: password size ;
: <textarea> ( -- renderer )
textarea new ;
-M: textarea render*
- <textarea
- [ rows>> [ present =rows ] when* ]
- [ cols>> [ present =cols ] when* ] bi
- =name
- textarea>
- present escape-string write
- </textarea> ;
+M:: textarea render* ( value name area -- xml )
+ area rows>> :> rows
+ area cols>> :> cols
+ [XML
+ <textarea
+ name=<-name->
+ rows=<-rows->
+ cols=<-cols->><-value-></textarea>
+ XML] ;
! Choice
TUPLE: choice size multiple choices ;
: <choice> ( -- choice )
choice new ;
-: render-option ( text selected? -- )
- <option [ "selected" =selected ] when option>
- present escape-string write
- </option> ;
-
-: render-options ( options selected -- )
- '[ dup _ member? render-option ] each ;
-
-M: choice render*
- <select
- swap =name
- dup size>> [ present =size ] when*
- dup multiple>> [ "true" =multiple ] when
- select>
- [ choices>> value ] [ multiple>> ] bi
- [ swap ] [ swap 1array ] if
- render-options
- </select> ;
+: render-option ( text selected? -- xml )
+ "selected" and swap
+ [XML <option selected=<->><-></option> XML] ;
+
+: render-options ( value choice -- xml )
+ [ choices>> value ] [ multiple>> ] bi
+ [ swap ] [ swap 1array ] if
+ '[ dup _ member? render-option ] map ;
+
+M:: choice render* ( value name choice -- xml )
+ choice size>> :> size
+ choice multiple>> "true" and :> multiple
+ value choice render-options :> contents
+ [XML <select
+ name=<-name->
+ size=<-size->
+ multiple=<-multiple->><-contents-></select> XML] ;
! Checkboxes
TUPLE: checkbox label ;
checkbox new ;
M: checkbox render*
- <input
- "checkbox" =type
- swap =name
- swap [ "true" =checked ] when
- input>
- label>> escape-string write
- </input> ;
+ [ "true" and ] [ ] [ label>> ] tri*
+ [XML <input
+ type="checkbox"
+ checked=<-> name=<->><-></input> XML] ;
! Link components
GENERIC: link-title ( obj -- string )
TUPLE: link target ;
M: link render*
- nip
- <a target>> [ =target ] when* dup link-href =href a>
- link-title present escape-string write
- </a> ;
+ nip swap
+ [ target>> ] [ [ link-href ] [ link-title ] bi ] bi*
+ [XML <a target=<-> href=<->><-></a> XML] ;
! XMode code component
TUPLE: code mode ;
nip
[ no-follow>> [ string>boolean link-no-follow? set ] when* ]
[ disable-images>> [ string>boolean disable-images? set ] when* ]
- [ parsed>> string>boolean [ (write-farkup) ] [ write-farkup ] if ]
+ [ parsed>> string>boolean [ (write-farkup) ] [ farkup>xml ] if ]
tri
] with-scope ;
SINGLETON: inspector
M: inspector render*
- 2drop [ describe ] with-html-writer ;
+ 2drop [
+ [ describe ] with-html-writer
+ ] with-string-writer <unescaped> ;
! Diff component
SINGLETON: comparison
! HTML component
SINGLETON: html
-M: html render* 2drop write ;
+M: html render* 2drop string>xml-chunk ;
{ $code "<a =href a> \"Click me\" write </a>" }
{ $code "<a \"http://\" prepend =href a> \"click\" write </a>" }
{ $code "<a [ \"http://\" % % ] \"\" make =href a> \"click\" write </a>" }
-"Tags that have no ``closing'' equivalent have a trailing " { $snippet "tag/>" } " form:"
+"Tags that have no “closing†equivalent have a trailing " { $snippet "tag/>" } " form:"
{ $code "<input \"text\" =type \"name\" =name 20 =size input/>" }
"For the full list of HTML tags and attributes, consult the word list for the " { $vocab-link "html.elements" } " vocabulary. In addition to HTML tag and attribute words, a few utilities are provided."
$nl
"true" "b" set-value
] unit-test
-[ "<input type='checkbox' name='a'>a</input><input type='checkbox' name='b' checked='true'>b</input>" ] [
+[ "<input type=\"checkbox\" name=\"a\">a</input><input type=\"checkbox\" checked=\"true\" name=\"b\">b</input>" ] [
[
"test12" test-template call-template
] run-template
USING: http help.markup help.syntax io.pathnames io.streams.string
io.encodings.8-bit io.encodings.binary kernel strings urls
-urls.encoding byte-arrays strings assocs sequences ;
+urls.encoding byte-arrays strings assocs sequences destructors ;
IN: http.client
HELP: download-failed
HELP: http-post
{ $values { "post-data" object } { "url" "a " { $link url } " or " { $link string } } { "response" response } { "data" sequence } }
-{ $description "Submits a form at a URL." }
+{ $description "Submits an HTTP POST request." }
+{ $errors "Throws an error if the HTTP request fails." } ;
+
+HELP: http-put
+{ $values { "post-data" object } { "url" "a " { $link url } " or " { $link string } } { "response" response } { "data" sequence } }
+{ $description "Submits an HTTP PUT request." }
{ $errors "Throws an error if the HTTP request fails." } ;
HELP: with-http-get
{ $subsection with-http-get }
{ $subsection with-http-request } ;
-ARTICLE: "http.client.post" "POST requests with the HTTP client"
-"As with GET requests, there is a high-level word which takes a " { $link url } " and a lower-level word which constructs an HTTP request object which can be passed to " { $link http-request } ":"
-{ $subsection http-post }
-{ $subsection <post-request> }
-"Both words take a post data parameter, which can be one of the following:"
+ARTICLE: "http.client.post-data" "HTTP client submission data"
+"HTTP POST and PUT request words take a post data parameter, which can be one of the following:"
{ $list
- { "a " { $link byte-array } " or " { $link string } " is sent the server without further encoding" }
- { "an " { $link assoc } " is interpreted as a series of form parameters, which are encoded with " { $link assoc>query } }
+ { "a " { $link byte-array } ": the data is sent the server without further encoding" }
+ { "a " { $link string } ": the data is encoded and then sent as a series of bytes" }
+ { "an " { $link assoc } ": the assoc is interpreted as a series of form parameters, which are encoded with " { $link assoc>query } }
+ { "an input stream: the contents of the input stream are transmitted to the server without being read entirely into memory - this is useful for large requests" }
{ { $link f } " denotes that there is no post data" }
+ { "a " { $link post-data } " tuple, for additional control" }
+}
+"When passing a stream, you must ensure the stream is closed afterwards. The best way is to use " { $link with-disposal } " or " { $link "destructors" } ". For example,"
+{ $code
+ "\"my-large-post-request.txt\" ascii <file-reader>"
+ "[ URL\" http://www.my-company.com/web-service\" http-post ] with-disposal"
} ;
+ARTICLE: "http.client.post" "POST requests with the HTTP client"
+"Basic usage involves passing post data and a " { $link url } ", and getting a " { $link response } " and data back:"
+{ $subsection http-post }
+"Advanced usage involves constructing a " { $link request } ", which allows " { $link "http.cookies" } " and " { $link "http.headers" } " to be set:"
+{ $subsection <post-request> }
+"Both words take a post data parameter; see " { $link "http.client.post-data" } "." ;
+
+ARTICLE: "http.client.put" "PUT requests with the HTTP client"
+"Basic usage involves passing post data and a " { $link url } ", and getting a " { $link response } " and data back:"
+{ $subsection http-post }
+"Advanced usage involves constructing a " { $link request } ", which allows " { $link "http.cookies" } " and " { $link "http.headers" } " to be set:"
+{ $subsection <post-request> }
+"Both words take a post data parameter; see " { $link "http.client.post-data" } "." ;
+
ARTICLE: "http.client.encoding" "Character encodings and the HTTP client"
"The " { $link http-request } ", " { $link http-get } " and " { $link http-post } " words output a sequence containing data that was sent by the server."
$nl
ARTICLE: "http.client" "HTTP client"
"The " { $vocab-link "http.client" } " vocabulary implements an HTTP and HTTPS client on top of " { $link "http" } "."
$nl
-"For HTTPS support, you must load the " { $vocab-link "urls.secure" } " vocab first. If you don't load it, HTTPS will not load and images generated by " { $vocab-link "tools.deploy" } " will be smaller as a result."
+"For HTTPS support, you must load the " { $vocab-link "urls.secure" } " vocab first. If you don't need HTTPS support, don't load " { $vocab-link "urls.secure" } "; this will reduce the size of images generated by " { $vocab-link "tools.deploy" } "."
$nl
"There are two primary usage patterns, data retrieval with GET requests and form submission with POST requests:"
{ $subsection "http.client.get" }
{ $subsection "http.client.post" }
+{ $subsection "http.client.put" }
+"Submission data for POST and PUT requests:"
+{ $subsection "http.client.post-data" }
"More esoteric use-cases, for example HTTP methods other than the above, are accomodated by constructing an empty request object with " { $link <request> } " and filling everything in by hand."
{ $subsection "http.client.encoding" }
{ $subsection "http.client.errors" }
-! Copyright (C) 2005, 2008 Slava Pestov.
+! Copyright (C) 2005, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs kernel math math.parser namespaces make
sequences strings splitting calendar continuations accessors vectors
io.pathnames io.encodings io.encodings.string io.encodings.ascii
io.encodings.utf8 io.encodings.8-bit io.encodings.binary
io.streams.duplex fry ascii urls urls.encoding present
-http http.parsers ;
+http http.parsers http.client.post-data ;
IN: http.client
+ERROR: too-many-redirects ;
+
+CONSTANT: max-redirects 10
+
+<PRIVATE
+
: write-request-line ( request -- request )
dup
[ method>> write bl ]
[ host>> ] [ port>> ] bi dup "http" protocol-port =
[ drop ] [ ":" swap number>string 3append ] if ;
+: set-host-header ( request header -- request header )
+ over url>> url-host "host" pick set-at ;
+
+: set-cookie-header ( header cookies -- header )
+ unparse-cookie "cookie" pick set-at ;
+
: write-request-header ( request -- request )
dup header>> >hashtable
- over url>> host>> [ over url>> url-host "host" pick set-at ] when
- over post-data>> [
- [ raw>> length "content-length" pick set-at ]
- [ content-type>> "content-type" pick set-at ]
- bi
- ] when*
- over cookies>> [ unparse-cookie "cookie" pick set-at ] unless-empty
+ over url>> host>> [ set-host-header ] when
+ over post-data>> [ set-post-data-headers ] when*
+ over cookies>> [ set-cookie-header ] unless-empty
write-header ;
-GENERIC: >post-data ( object -- post-data )
-
-M: post-data >post-data ;
-
-M: string >post-data utf8 encode "application/octet-stream" <post-data> ;
-
-M: byte-array >post-data "application/octet-stream" <post-data> ;
-
-M: assoc >post-data assoc>query ascii encode "application/x-www-form-urlencoded" <post-data> ;
-
-M: f >post-data ;
-
-: unparse-post-data ( request -- request )
- [ >post-data ] change-post-data ;
-
-: write-post-data ( request -- request )
- dup method>> [ "POST" = ] [ "PUT" = ] bi or [ dup post-data>> raw>> write ] when ;
-
: write-request ( request -- )
unparse-post-data
write-request-line
read-response-line
read-response-header ;
-: max-redirects 10 ;
-
-ERROR: too-many-redirects ;
-
-<PRIVATE
-
DEFER: (with-http-request)
SYMBOL: redirects
read-crlf B{ } assert= read-chunked
] if ; inline recursive
-: read-unchunked ( quot: ( chunk -- ) -- )
- 8192 read-partial dup [
- [ swap call ] [ drop read-unchunked ] 2bi
- ] [ 2drop ] if ; inline recursive
-
: read-response-body ( quot response -- )
binary decode-input
"transfer-encoding" header "chunked" =
- [ read-chunked ] [ read-unchunked ] if ; inline
+ [ read-chunked ] [ each-block ] if ; inline
: <request-socket> ( -- stream )
request get url>> url-addr ascii <client> drop
[ do-redirect ] [ nip ] if
] with-variable ; inline recursive
+: <client-request> ( url method -- request )
+ <request>
+ swap >>method
+ swap >url ensure-port >>url ; inline
+
PRIVATE>
: success? ( code -- ? ) 200 299 between? ;
dup code>> success? [ download-failed ] unless ;
: with-http-request ( request quot -- response )
- (with-http-request) check-response ; inline
+ [ (with-http-request) check-response ] with-destructors ; inline
: http-request ( request -- response data )
[ [ % ] with-http-request ] B{ } make
over content-charset>> decode ;
: <get-request> ( url -- request )
- <request>
- "GET" >>method
- swap >url ensure-port >>url ;
+ "GET" <client-request> ;
: http-get ( url -- response data )
<get-request> http-request ;
dup download-name download-to ;
: <post-request> ( post-data url -- request )
- <request>
- "POST" >>method
- swap >url ensure-port >>url
+ "POST" <client-request>
swap >>post-data ;
: http-post ( post-data url -- response data )
<post-request> http-request ;
+: <put-request> ( post-data url -- request )
+ "PUT" <client-request>
+ swap >>post-data ;
+
+: http-put ( post-data url -- response data )
+ <put-request> http-request ;
+
USING: vocabs vocabs.loader ;
"debugger" vocab [ "http.client.debugger" require ] when
--- /dev/null
+Slava Pestov
\ No newline at end of file
--- /dev/null
+! Copyright (C) 2009 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: tools.test http.client.post-data ;
+IN: http.client.post-data.tests
--- /dev/null
+! Copyright (C) 2009 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors assocs destructors http io io.encodings.ascii
+io.encodings.binary io.encodings.string io.encodings.utf8
+io.files io.files.info io.pathnames kernel math.parser
+namespaces sequences strings urls.encoding ;
+IN: http.client.post-data
+
+TUPLE: measured-stream stream size ;
+
+C: <measured-stream> measured-stream
+
+<PRIVATE
+
+GENERIC: (set-post-data-headers) ( header data -- header )
+
+M: sequence (set-post-data-headers)
+ length "content-length" pick set-at ;
+
+M: measured-stream (set-post-data-headers)
+ size>> "content-length" pick set-at ;
+
+M: object (set-post-data-headers)
+ drop "chunked" "transfer-encoding" pick set-at ;
+
+PRIVATE>
+
+: set-post-data-headers ( header post-data -- header )
+ [ data>> (set-post-data-headers) ]
+ [ content-type>> "content-type" pick set-at ] bi ;
+
+<PRIVATE
+
+GENERIC: (write-post-data) ( data -- )
+
+M: sequence (write-post-data) write ;
+
+M: measured-stream (write-post-data)
+ stream>> [ [ write ] each-block ] with-input-stream ;
+
+: write-chunk ( chunk -- )
+ [ length >hex ";\r\n" append ascii encode write ] [ write ] bi ;
+
+M: object (write-post-data)
+ [ [ write-chunk ] each-block ] with-input-stream
+ "0;\r\n" ascii encode write ;
+
+GENERIC: >post-data ( object -- post-data )
+
+M: f >post-data ;
+
+M: post-data >post-data ;
+
+M: string >post-data
+ utf8 encode
+ "application/octet-stream" <post-data>
+ swap >>data ;
+
+M: assoc >post-data
+ "application/x-www-form-urlencoded" <post-data>
+ swap >>params ;
+
+M: object >post-data
+ "application/octet-stream" <post-data>
+ swap >>data ;
+
+: pathname>measured-stream ( pathname -- stream )
+ string>>
+ [ binary <file-reader> &dispose ]
+ [ file-info size>> ] bi
+ <measured-stream> ;
+
+: normalize-post-data ( request -- request )
+ dup post-data>> [
+ dup params>> [
+ assoc>query ascii encode >>data
+ ] when*
+ dup data>> pathname? [
+ [ pathname>measured-stream ] change-data
+ ] when
+ drop
+ ] when* ;
+
+PRIVATE>
+
+: unparse-post-data ( request -- request )
+ [ >post-data ] change-post-data
+ normalize-post-data ;
+
+: write-post-data ( request -- request )
+ dup post-data>> [ data>> (write-post-data) ] when* ;
{ $table
{ { $slot "version" } { "The HTTP version. Default is " { $snippet "1.1" } " and should not be changed without good reason." } }
{ { $slot "code" } { "HTTP status code, an " { $link integer } ". Examples are 200 for success, 404 for file not found, and so on." } }
- { { $slot "message" } { "HTTP status message, only displayed to the user. If the status code is 200, the status message might be ``Success'', for example." } }
+ { { $slot "message" } { "HTTP status message, only displayed to the user. If the status code is 200, the status message might be “Successâ€, for example." } }
{ { $slot "header" } { "An assoc of HTTP header values. See " { $link "http.headers" } } }
{ { $slot "cookies" } { "A sequence of HTTP cookies. See " { $link "http.cookies" } } }
{ { $slot "content-type" } { "an HTTP content type" } }
{ $table
{ { $slot "version" } { "The HTTP version. Default is " { $snippet "1.1" } " and should not be changed without good reason." } }
{ { $slot "code" } { "HTTP status code, an " { $link integer } ". Examples are 200 for success, 404 for file not found, and so on." } }
- { { $slot "message" } { "HTTP status message, only displayed to the user. If the status code is 200, the status message might be ``Success'', for example." } }
+ { { $slot "message" } { "HTTP status message, only displayed to the user. If the status code is 200, the status message might be “Successâ€, for example." } }
{ { $slot "body" } { "an HTTP response body" } }
} } ;
{ $side-effects "request/response" } ;
HELP: <post-data>
-{ $values { "raw" byte-array } { "content-type" "a MIME type string" } { "post-data" post-data } }
+{ $values { "content-type" "a MIME type string" } { "post-data" post-data } }
{ $description "Creates a new " { $link post-data } "." } ;
HELP: header
HELP: set-header
{ $values { "request/response" "a " { $link request } " or a " { $link response } } { "value" object } { "key" string } }
{ $description "Stores a value into the HTTP header of a request or response. The value can be any object supported by " { $link present } "." }
-{ $notes "This word always returns the same object that was input. This allows for a ``pipeline'' coding style, where several header parameters are set in a row." }
+{ $notes "This word always returns the same object that was input. This allows for a “pipeline†coding style, where several header parameters are set in a row." }
{ $side-effects "request/response" } ;
ARTICLE: "http.cookies" "HTTP cookies"
-USING: http http.server http.client tools.test multiline
+USING: http http.server http.client http.client.private tools.test multiline
io.streams.string io.encodings.utf8 io.encodings.8-bit
io.encodings.binary io.encodings.string kernel arrays splitting
sequences assocs io.sockets db db.sqlite continuations urls
{ method "POST" }
{ version "1.1" }
{ header H{ { "some-header" "1; 2" } { "content-length" "4" } { "content-type" "application/octet-stream" } } }
- { post-data T{ post-data { content "blah" } { raw "blah" } { content-type "application/octet-stream" } } }
+ { post-data T{ post-data { data "blah" } { content-type "application/octet-stream" } } }
{ cookies V{ } }
}
] [
raw-response new
"1.1" >>version ;
-TUPLE: post-data raw content content-type form-variables uploaded-files ;
+TUPLE: post-data data params content-type content-encoding ;
-: <post-data> ( form-variables uploaded-files raw content-type -- post-data )
+: <post-data> ( content-type -- post-data )
post-data new
- swap >>content-type
- swap >>raw
- swap >>uploaded-files
- swap >>form-variables ;
+ swap >>content-type ;
: parse-content-type-attributes ( string -- attributes )
" " split harvest [ "=" split1 [ >lower ] dip ] { } map>assoc ;
request get "accept" header "HTTP_ACCEPT" set\r
\r
post-request? [\r
- request get post-data>> raw>>\r
+ request get post-data>> data>>\r
[ "CONTENT_TYPE" set ]\r
[ length number>string "CONTENT_LENGTH" set ]\r
bi\r
swap '[\r
binary encode-output\r
_ output-stream get swap <cgi-process> binary <process-stream> [\r
- post-request? [ request get post-data>> raw>> write flush ] when\r
- input-stream get swap (stream-copy)\r
+ post-request? [ request get post-data>> data>> write flush ] when\r
+ '[ _ write ] each-block\r
] with-stream\r
] >>body ;\r
\r
}
"In the above example, visiting any URL other than " { $snippet "/new" } ", " { $snippet "/edit" } ", " { $snippet "/delete" } ", or " { $snippet "/" } " will result in a 404 error."
{ $heading "Another pathname dispatcher" }
-"On the other hand, suppose we wanted to route all unrecognized paths to a ``view'' action:"
+"On the other hand, suppose we wanted to route all unrecognized paths to a “view†action:"
{ $code
<" <dispatcher>
<new-action> "new" add-responder
html.streams ;
IN: http.server
-\ parse-cookie DEBUG add-input-logging
-
: check-absolute ( url -- url )
dup path>> "/" head? [ "Bad request: URL" throw ] unless ; inline
";" split1 nip
"=" split1 nip [ no-boundary ] unless* ;
-: read-multipart-data ( request -- form-variables uploaded-files )
+: read-multipart-data ( request -- mime-parts )
[ "content-type" header ]
[ "content-length" header string>number ] bi
unlimit-input
: read-content ( request -- bytes )
"content-length" header string>number read ;
-: parse-content ( request content-type -- form-variables uploaded-files raw )
- {
- { "multipart/form-data" [ read-multipart-data f ] }
- { "application/x-www-form-urlencoded" [ read-content [ f f ] dip ] }
- [ drop read-content [ f f ] dip ]
+: parse-content ( request content-type -- post-data )
+ [ <post-data> swap ] keep {
+ { "multipart/form-data" [ read-multipart-data >>params ] }
+ { "application/x-www-form-urlencoded" [ read-content query>assoc >>params ] }
+ [ drop read-content >>data ]
} case ;
: read-post-data ( request -- request )
dup method>> "POST" = [
dup dup "content-type" header
- [ ";" split1 drop parse-content ] keep
- <post-data> >>post-data
+ ";" split1 drop parse-content >>post-data
] when ;
: extract-host ( request -- request )
LOG: httpd-header NOTICE
-: log-header ( headers name -- )
- tuck header 2array httpd-header ;
+: log-header ( request name -- )
+ [ nip ] [ header ] 2bi 2array httpd-header ;
: log-request ( request -- )
[ [ method>> ] [ url>> ] bi 2array httpd-hit ]
-! Copyright (C) 2008 Slava Pestov.
+! Copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: io kernel macros make multiline namespaces parser
present sequences strings splitting fry accessors ;
IN: interpolate
+<PRIVATE
+
TUPLE: interpolate-var name ;
: (parse-interpolate) ( string -- )
: parse-interpolate ( string -- seq )
[ (parse-interpolate) ] { } make ;
-MACRO: interpolate ( string -- )
- parse-interpolate [
+: (interpolate) ( string quot -- quot' )
+ [ parse-interpolate ] dip '[
dup interpolate-var?
- [ name>> '[ _ get present write ] ]
+ [ name>> @ '[ _ @ present write ] ]
[ '[ _ write ] ]
if
- ] map [ ] join ;
+ ] map [ ] join ; inline
+
+PRIVATE>
+
+MACRO: interpolate ( string -- )
+ [ [ get ] ] (interpolate) ;
: interpolate-locals ( string -- quot )
- parse-interpolate [
- dup interpolate-var?
- [ name>> search '[ _ present write ] ]
- [ '[ _ write ] ]
- if
- ] map [ ] join ;
+ [ search [ ] ] (interpolate) ;
-: I[ "]I" parse-multiline-string
- interpolate-locals parsed \ call parsed ; parsing
+: I[
+ "]I" parse-multiline-string
+ interpolate-locals over push-all ; parsing
{ $description "From a specification, produce an interval tree. The specification is an assoc where the keys are intervals, or pairs of numbers to represent intervals, or individual numbers to represent singleton intervals. The values are the values int he interval map. Construction time is O(n log n)." } ;\r
\r
ARTICLE: "interval-maps" "Interval maps"\r
-"Interval maps are a mechanism, similar to assocs, where a set of closed intervals of keys are associated with values. As such, interval maps do not conform to the assoc protocol, because intervals of floats, for example, can be used, and it is impossible to get a list of keys in between."\r
+"The " { $vocab-link "interval-maps" } " vocabulary implements a data structure, similar to assocs, where a set of closed intervals of keys are associated with values. As such, interval maps do not conform to the assoc protocol, because intervals of floats, for example, can be used, and it is impossible to get a list of keys in between."\r
+$nl\r
"The following operations are used to query interval maps:"\r
{ $subsection interval-at* }\r
{ $subsection interval-at }\r
\r
: interval-at* ( key map -- value ? )\r
[ drop ] [ array>> find-interval ] 2bi\r
- tuck interval-contains? [ third t ] [ drop f f ] if ;\r
+ [ nip ] [ interval-contains? ] 2bi\r
+ [ third t ] [ drop f f ] if ;\r
\r
: interval-at ( key map -- value ) interval-at* drop ;\r
\r
: default-security-attributes ( -- obj )
"SECURITY_ATTRIBUTES" <c-object>
"SECURITY_ATTRIBUTES" heap-size
- over set-SECURITY_ATTRIBUTES-nLength ;
+ over set-SECURITY_ATTRIBUTES-nLength ;
\ No newline at end of file
HELP: cwd
{ $values { "path" "a pathname string" } }
{ $description "Outputs the current working directory of the Factor process." }
-{ $errors "Windows CE has no concept of ``current directory'', so this word throws an error there." }
+{ $errors "Windows CE has no concept of “current directoryâ€, so this word throws an error there." }
{ $notes "User code should use the value of the " { $link current-directory } " variable instead." } ;
HELP: cd
{ $values { "path" "a pathname string" } }
{ $description "Changes the current working directory of the Factor process." }
-{ $errors "Windows CE has no concept of ``current directory'', so this word throws an error there." }
+{ $errors "Windows CE has no concept of “current directoryâ€, so this word throws an error there." }
{ $notes "User code should use " { $link with-directory } " or " { $link set-current-directory } " instead." } ;
{ cd cwd current-directory set-current-directory with-directory } related-words
"This variable can be changed with a pair of words:"
{ $subsection set-current-directory }
{ $subsection with-directory }
-"This variable is independent of the operating system notion of ``current working directory''. While all Factor I/O operations use the variable and not the operating system's value, care must be taken when making FFI calls which expect a pathname. The first option is to resolve relative paths:"
+"This variable is independent of the operating system notion of “current working directoryâ€. While all Factor I/O operations use the variable and not the operating system's value, care must be taken when making FFI calls which expect a pathname. The first option is to resolve relative paths:"
{ $subsection (normalize-path) }
"The second is to change the working directory of the current process:"
{ $subsection cd }
{ $subsection "current-directory" }
{ $subsection "io.directories.listing" }
{ $subsection "io.directories.create" }
-{ $subsection "delete-move-copy" } ;
+{ $subsection "delete-move-copy" }
+{ $subsection "io.directories.hierarchy" } ;
ABOUT: "io.directories"
[ t ] [
[
- 10 [ "io.paths.test" "gogogo" make-unique-file* ] replicate
- current-directory get t [ ] find-all-files
- ] with-unique-directory
- [ natural-sort ] bi@ =
+ 10 [ "io.paths.test" "gogogo" make-unique-file ] replicate
+ current-temporary-directory get t [ ] find-all-files
+ ] with-unique-directory drop [ natural-sort ] bi@ =
] unit-test
RemoveDirectory win32-error=0/f ;
: find-first-file ( path -- WIN32_FIND_DATA handle )
- "WIN32_FIND_DATA" <c-object> tuck
- FindFirstFile
+ "WIN32_FIND_DATA" <c-object>
+ [ nip ] [ FindFirstFile ] 2bi
[ INVALID_HANDLE_VALUE = [ win32-error-string throw ] when ] keep ;
: find-next-file ( path -- WIN32_FIND_DATA/f )
- "WIN32_FIND_DATA" <c-object> tuck
- FindNextFile 0 = [
+ "WIN32_FIND_DATA" <c-object>
+ [ nip ] [ FindNextFile ] 2bi 0 = [
GetLastError ERROR_NO_MORE_FILES = [
win32-error
] unless drop f
strings ;
IN: io.encodings.8-bit
-ARTICLE: "io.encodings.8-bit" "8-bit encodings"
+ARTICLE: "io.encodings.8-bit" "Legacy 8-bit encodings"
"Many encodings are a simple mapping of bytes onto characters. The " { $vocab-link "io.encodings.8-bit" } " vocabulary implements these generically using existing resource files. These encodings should be used with extreme caution, as fully general Unicode encodings like UTF-8 are nearly always more appropriate. The following 8-bit encodings are already defined:"
{ $subsection latin1 }
{ $subsection latin2 }
: decode-if< ( stream encoding max -- character )
nip swap stream-read1 dup
- [ tuck > [ >fixnum ] [ drop replacement-char ] if ] [ 2drop f ] if ; inline
+ [ [ nip ] [ > ] 2bi [ >fixnum ] [ drop replacement-char ] if ]
+ [ 2drop f ] if ; inline
PRIVATE>
SINGLETON: ascii
M: freebsd new-file-system-info freebsd-file-system-info new ;
M: freebsd file-system-statfs ( path -- byte-array )
- "statfs" <c-object> tuck statfs io-error ;
+ "statfs" <c-object> [ statfs io-error ] keep ;
M: freebsd statfs>file-system-info ( file-system-info statvfs -- file-system-info )
{
} cleave ;
M: freebsd file-system-statvfs ( path -- byte-array )
- "statvfs" <c-object> tuck statvfs io-error ;
+ "statvfs" <c-object> [ statvfs io-error ] keep ;
M: freebsd statvfs>file-system-info ( file-system-info statvfs -- file-system-info )
{
M: linux new-file-system-info linux-file-system-info new ;
M: linux file-system-statfs ( path -- byte-array )
- "statfs64" <c-object> tuck statfs64 io-error ;
+ "statfs64" <c-object> [ statfs64 io-error ] keep ;
M: linux statfs>file-system-info ( struct -- statfs )
{
} cleave ;
M: linux file-system-statvfs ( path -- byte-array )
- "statvfs64" <c-object> tuck statvfs64 io-error ;
+ "statvfs64" <c-object> [ statvfs64 io-error ] keep ;
M: linux statvfs>file-system-info ( struct -- statfs )
{
M: macosx new-file-system-info macosx-file-system-info new ;
M: macosx file-system-statfs ( normalized-path -- statfs )
- "statfs64" <c-object> tuck statfs64 io-error ;
+ "statfs64" <c-object> [ statfs64 io-error ] keep ;
M: macosx file-system-statvfs ( normalized-path -- statvfs )
- "statvfs" <c-object> tuck statvfs io-error ;
+ "statvfs" <c-object> [ statvfs io-error ] keep ;
M: macosx statfs>file-system-info ( file-system-info byte-array -- file-system-info' )
{
M: netbsd new-file-system-info netbsd-file-system-info new ;
M: netbsd file-system-statvfs
- "statvfs" <c-object> tuck statvfs io-error ;
+ "statvfs" <c-object> [ statvfs io-error ] keep ;
M: netbsd statvfs>file-system-info ( file-system-info statvfs -- file-system-info' )
{
M: openbsd new-file-system-info freebsd-file-system-info new ;
M: openbsd file-system-statfs
- "statfs" <c-object> tuck statfs io-error ;
+ "statfs" <c-object> [ statfs io-error ] keep ;
M: openbsd statfs>file-system-info ( file-system-info statfs -- file-system-info' )
{
} cleave ;
M: openbsd file-system-statvfs ( normalized-path -- statvfs )
- "statvfs" <c-object> tuck statvfs io-error ;
+ "statvfs" <c-object> [ statvfs io-error ] keep ;
M: openbsd statvfs>file-system-info ( file-system-info statvfs -- file-system-info' )
{
[ t ] [
[
- 5 "lol" make-test-links
- "lol1" follow-links
- current-directory get "lol5" append-path =
- ] with-unique-directory
+ current-temporary-directory get [
+ 5 "lol" make-test-links
+ "lol1" follow-links
+ current-temporary-directory get "lol5" append-path =
+ ] with-directory
+ ] cleanup-unique-directory
] unit-test
[
[
- 100 "laf" make-test-links "laf1" follow-links
+ current-temporary-directory get [
+ 100 "laf" make-test-links "laf1" follow-links
+ ] with-directory
] with-unique-directory
] [ too-many-symlinks? ] must-fail-with
[ t ] [
110 symlink-depth [
[
- 100 "laf" make-test-links
- "laf1" follow-links
- current-directory get "laf100" append-path =
- ] with-unique-directory
+ current-temporary-directory get [
+ 100 "laf" make-test-links
+ "laf1" follow-links
+ current-temporary-directory get "laf100" append-path =
+ ] with-directory
+ ] cleanup-unique-directory
] with-variable
] unit-test
USING: help.markup help.syntax io io.ports kernel math
-io.pathnames io.directories math.parser io.files strings ;
+io.pathnames io.directories math.parser io.files strings
+quotations io.files.unique.private ;
IN: io.files.unique
-HELP: temporary-path
+HELP: default-temporary-directory
{ $values
{ "path" "a pathname string" }
}
HELP: make-unique-file ( prefix suffix -- path )
{ $values { "prefix" "a string" } { "suffix" "a string" }
{ "path" "a pathname string" } }
-{ $description "Creates a file that is guaranteed not to exist in a platform-specific temporary directory. The file name is composed of a prefix, a number of random digits and letters, and the suffix. Returns the full pathname." }
+{ $description "Creates a file that is guaranteed not to exist in the directory stored in " { $link current-temporary-directory } ". The file name is composed of a prefix, a number of random digits and letters, and the suffix. Returns the full pathname." }
{ $errors "Throws an error if a new unique file cannot be created after a number of tries. The most likely error is incorrect directory permissions on the temporary directory." } ;
-HELP: make-unique-file*
-{ $values
- { "prefix" string } { "suffix" string }
- { "path" "a pathname string" }
-}
-{ $description "Creates a file that is guaranteed not to exist in the directory in the " { $link current-directory } " variable. The file name is composed of a prefix, a number of random digits and letters, and the suffix. Returns the full pathname." } ;
+{ unique-file make-unique-file cleanup-unique-file } related-words
-{ make-unique-file make-unique-file* with-unique-file } related-words
-
-HELP: with-unique-file ( prefix suffix quot: ( path -- ) -- )
+HELP: cleanup-unique-file ( prefix suffix quot: ( path -- ) -- )
{ $values { "prefix" "a string" } { "suffix" "a string" }
{ "quot" "a quotation" } }
{ $description "Creates a file with " { $link make-unique-file } " and calls the quotation with the path name on the stack." }
{ $notes "The unique file will be deleted after calling this word." } ;
-HELP: make-unique-directory ( -- path )
+HELP: unique-directory ( -- path )
{ $values { "path" "a pathname string" } }
-{ $description "Creates a directory that is guaranteed not to exist in a platform-specific temporary directory and returns the full pathname." }
+{ $description "Creates a directory in the value in " { $link current-temporary-directory } " that is guaranteed not to exist in and returns the full pathname." }
{ $errors "Throws an error if the directory cannot be created after a number of tries. The most likely error is incorrect directory permissions on the temporary directory." } ;
-HELP: with-unique-directory ( quot -- )
+HELP: cleanup-unique-directory ( quot -- )
{ $values { "quot" "a quotation" } }
-{ $description "Creates a directory with " { $link make-unique-directory } " and calls the quotation with the pathname on the stack using the " { $link with-directory } " combinator. The quotation can access the " { $link current-directory } " symbol for the name of the temporary directory." }
-{ $notes "The directory will be deleted after calling this word, even if an error is thrown in the quotation." } ;
+{ $description "Creates a directory with " { $link unique-directory } " and calls the quotation with the pathname on the stack using the " { $link with-temporary-directory } " combinator. The quotation can access the " { $link current-temporary-directory } " symbol for the name of the temporary directory. Subsequent unique files will be created in this unique directory until the combinator returns." }
+{ $notes "The directory will be deleted after calling this word, even if an error is thrown in the quotation. This combinator is like " { $link with-unique-directory } " but does not delete the directory." } ;
+
+HELP: with-unique-directory
+{ $values
+ { "quot" quotation }
+ { "path" "a pathname string" }
+}
+{ $description "Creates a directory with " { $link unique-directory } " and calls the quotation with the pathname on the stack using the " { $link with-temporary-directory } " combinator. The quotation can access the " { $link current-temporary-directory } " symbol for the name of the temporary directory. Subsequent unique files will be created in this unique directory until the combinator returns." } ;
+
+HELP: current-temporary-directory
+{ $values
+ { "value" "a path" }
+}
+{ $description "The temporary directory used for creating unique files and directories." } ;
+
+HELP: unique-file
+{ $values
+ { "path" "a pathname string" }
+ { "path'" "a pathname string" }
+}
+{ $description "Creates a temporary file in the directory stored in " { $link current-temporary-directory } " and outputs the path name." } ;
+
+HELP: with-temporary-directory
+{ $values
+ { "path" "a pathname string" } { "quot" quotation }
+}
+{ $description "Sets " { $link current-temporary-directory } " to " { $snippet "path" } " and calls the quotation, restoring the previous temporary path after execution completes." } ;
-ARTICLE: "io.files.unique" "Temporary files"
-"The " { $vocab-link "io.files.unique" } " vocabulary implements cross-platform temporary file creation in a high-level and secure way." $nl
-"Creating temporary files:"
+ARTICLE: "io.files.unique" "Unique files"
+"The " { $vocab-link "io.files.unique" } " vocabulary implements cross-platform unique file creation in temporary directories in a high-level and secure way." $nl
+"Changing the temporary path:"
+{ $subsection current-temporary-directory }
+"Creating unique files:"
+{ $subsection unique-file }
+{ $subsection cleanup-unique-file }
{ $subsection make-unique-file }
-{ $subsection make-unique-file* }
-{ $subsection with-unique-file }
-"Creating temporary directories:"
-{ $subsection make-unique-directory }
-{ $subsection with-unique-directory } ;
+"Creating unique directories:"
+{ $subsection unique-directory }
+{ $subsection with-unique-directory }
+{ $subsection cleanup-unique-directory }
+"Default temporary directory:"
+{ $subsection default-temporary-directory } ;
ABOUT: "io.files.unique"
USING: io.encodings.ascii sequences strings io io.files accessors
tools.test kernel io.files.unique namespaces continuations
-io.files.info io.pathnames ;
+io.files.info io.pathnames io.directories ;
IN: io.files.unique.tests
[ 123 ] [
"core" ".test" [
[ [ 123 CHAR: a <repetition> ] dip ascii set-file-contents ]
[ file-info size>> ] bi
- ] with-unique-file
+ ] cleanup-unique-file
] unit-test
[ t ] [
- [ current-directory get file-info directory? ] with-unique-directory
+ [ current-directory get file-info directory? ] cleanup-unique-directory
] unit-test
[ t ] [
current-directory get
- [ [ "FAILDOG" throw ] with-unique-directory ] [ drop ] recover
+ [ [ "FAILDOG" throw ] cleanup-unique-directory ] [ drop ] recover
current-directory get =
] unit-test
+
+[ t ] [
+ [
+ "asdf" unique-file drop
+ "asdf2" unique-file drop
+ current-temporary-directory get directory-files length 2 =
+ ] cleanup-unique-directory
+] unit-test
+
+[ t ] [
+ [ ] with-unique-directory >boolean
+] unit-test
+
+[ t ] [
+ [
+ "asdf" unique-file drop
+ "asdf" unique-file drop
+ current-temporary-directory get directory-files length 2 =
+ ] with-unique-directory drop
+] unit-test
sequences system vocabs.loader ;
IN: io.files.unique
-HOOK: touch-unique-file io-backend ( path -- )
-HOOK: temporary-path io-backend ( -- path )
+HOOK: (touch-unique-file) io-backend ( path -- )
+: touch-unique-file ( path -- )
+ normalize-path (touch-unique-file) ;
+
+HOOK: default-temporary-directory io-backend ( -- path )
+
+SYMBOL: current-temporary-directory
SYMBOL: unique-length
SYMBOL: unique-retries
10 unique-length set-global
10 unique-retries set-global
+: with-temporary-directory ( path quot -- )
+ [ current-temporary-directory ] dip with-variable ; inline
+
<PRIVATE
: random-letter ( -- ch )
{ t f } random
[ 10 random CHAR: 0 + ] [ random-letter ] if ;
-: random-name ( n -- string )
- [ random-ch ] "" replicate-as ;
-
-PRIVATE>
+: random-name ( -- string )
+ unique-length get [ random-ch ] "" replicate-as ;
: (make-unique-file) ( path prefix suffix -- path )
'[
- _ _ _ unique-length get random-name glue append-path
+ _ _ _ random-name glue append-path
dup touch-unique-file
] unique-retries get retry ;
-: make-unique-file ( prefix suffix -- path )
- [ temporary-path ] 2dip (make-unique-file) ;
+PRIVATE>
-: make-unique-file* ( prefix suffix -- path )
- [ current-directory get ] 2dip (make-unique-file) ;
+: make-unique-file ( prefix suffix -- path )
+ [ current-temporary-directory get ] 2dip (make-unique-file) ;
-: with-unique-file ( prefix suffix quot: ( path -- ) -- )
+: cleanup-unique-file ( prefix suffix quot: ( path -- ) -- )
[ make-unique-file ] dip [ delete-file ] bi ; inline
-: make-unique-directory ( -- path )
+: unique-directory ( -- path )
[
- temporary-path unique-length get random-name append-path
+ current-temporary-directory get
+ random-name append-path
dup make-directory
] unique-retries get retry ;
-: with-unique-directory ( quot: ( -- ) -- )
- [ make-unique-directory ] dip
- '[ _ with-directory ] [ delete-tree ] bi ; inline
+: with-unique-directory ( quot -- path )
+ [ unique-directory ] dip
+ [ with-temporary-directory ] [ drop ] 2bi ; inline
+
+: cleanup-unique-directory ( quot: ( -- ) -- )
+ [ unique-directory ] dip
+ '[ _ with-temporary-directory ] [ delete-tree ] bi ; inline
+
+: unique-file ( path -- path' )
+ "" make-unique-file ;
{
{ [ os unix? ] [ "io.files.unique.unix" ] }
{ [ os windows? ] [ "io.files.unique.windows" ] }
} cond require
+
+default-temporary-directory current-temporary-directory set-global
: open-unique-flags ( -- flags )
{ O_RDWR O_CREAT O_EXCL } flags ;
-M: unix touch-unique-file ( path -- )
+M: unix (touch-unique-file) ( path -- )
open-unique-flags file-mode open-file close-file ;
-M: unix temporary-path ( -- path ) "/tmp" ;
+M: unix default-temporary-directory ( -- path ) "/tmp" ;
io.files.unique ;
IN: io.files.unique.windows
-M: windows touch-unique-file ( path -- )
+M: windows (touch-unique-file) ( path -- )
GENERIC_WRITE CREATE_NEW 0 open-file dispose ;
-M: windows temporary-path ( -- path )
+M: windows default-temporary-directory ( -- path )
"TEMP" os-env ;
[ ] [ "monitor-test-self" temp-file touch-file ] unit-test
[ t ] [
- "m" get next-change drop
+ "m" get next-change path>>
[ "" = ] [ "monitor-test-self" temp-file = ] bi or
] unit-test
[ ] [ "monitor-test-self" temp-file touch-file ] unit-test
[ t ] [
- "m" get next-change drop
+ "m" get next-change path>>
[ "" = ] [ "monitor-test-self" temp-file = ] bi or
] unit-test
{ $contract "Opens a file system change monitor which listens for changes on " { $snippet "path" } " and posts notifications to " { $snippet "mailbox" } " as triples with shape " { $snippet "{ path changed monitor } " } ". The boolean indicates whether changes in subdirectories should be reported." }\r
{ $errors "Throws an error if the pathname does not exist, if a monitor could not be created or if the platform does not support monitors." } ;\r
\r
+HELP: file-change\r
+{ $class-description "A change notification output by " { $link next-change } ". The " { $snippet "path" } " slot holds a pathname string. The " { $snippet "changed" } " slots holds a sequence of symbols documented in " { $link "io.monitors.descriptors" } "." } ;\r
+\r
HELP: next-change\r
-{ $values { "monitor" "a monitor" } { "path" "a pathname string" } { "changed" "a change descriptor" } }\r
-{ $contract "Waits for file system changes and outputs the pathname of the first changed file. The change descriptor is a sequence of symbols documented in " { $link "io.monitors.descriptors" } "." }\r
+{ $values { "monitor" "a monitor" } { "change" file-change } }\r
+{ $contract "Waits for file system changes and outputs a change descriptor for the first changed file." }\r
{ $errors "Throws an error if the monitor is closed from another thread." } ;\r
\r
HELP: with-monitor\r
{ $description "Indicates that a file has been renamed." } ;\r
\r
ARTICLE: "io.monitors.descriptors" "File system change descriptors"\r
-"Change descriptors output by " { $link next-change } ":"\r
+"The " { $link next-change } " word outputs instances of a class:"\r
+{ $subsection file-change }\r
+"The " { $slot "changed" } " slot holds a sequence which may contain any of the following symbols:"\r
{ $subsection +add-file+ }\r
{ $subsection +remove-file+ }\r
{ $subsection +modify-file+ }\r
{ $subsection +rename-file+ } ;\r
\r
ARTICLE: "io.monitors.platforms" "Monitors on different platforms"\r
-"Whether the " { $snippet "path" } " output value of " { $link next-change } " contains an absolute path or a path relative to the path given to " { $link <monitor> } " is unspecified, and may even vary on the same platform. User code should not assume either case."\r
+"Whether the " { $slot "path" } " slot of a " { $link file-change } " contains an absolute path or a path relative to the path given to " { $link <monitor> } " is unspecified, and may even vary on the same platform. User code should not assume either case."\r
$nl\r
"If the immediate path being monitored was changed, then " { $snippet "path" } " will equal " { $snippet "\"\"" } "; however this condition is not reported on all platforms. See below."\r
{ $heading "Mac OS X" }\r
$nl\r
{ $snippet "FSEventStream" } "s always monitor directory hierarchies recursively, and the " { $snippet "recursive?" } " parameter to " { $link <monitor> } " has no effect."\r
$nl\r
-"The " { $snippet "changed" } " output value of the " { $link next-change } " word always outputs " { $link +modify-file+ } " and the " { $snippet "path" } " output value is always the directory containing the file that changed. Unlike other platforms, fine-grained information is not available."\r
+"The " { $snippet "changed" } " slot of the " { $link file-change } " word tuple always contains " { $link +modify-file+ } " and the " { $snippet "path" } " slot is always the directory containing the file that changed. Unlike other platforms, fine-grained information is not available."\r
$nl\r
"Only directories may be monitored, not individual files. Changes to the directory itself (permissions, modification time, and so on) are not reported; only changes to children are reported."\r
{ $heading "Windows" }\r
{ $code\r
"USE: io.monitors"\r
": watch-loop ( monitor -- )"\r
- " dup next-change . . nl nl flush watch-loop ;"\r
+ " dup next-change . nl nl flush watch-loop ;"\r
""\r
": watch-directory ( path -- )"\r
" [ t [ watch-loop ] with-monitor ] with-monitors"\r
continuations namespaces concurrency.count-downs kernel io
threads calendar prettyprint destructors io.timeouts
io.files.temp io.directories io.directories.hierarchy
-io.pathnames ;
+io.pathnames accessors ;
os { winnt linux macosx } member? [
[
"b" get count-down
[
- "m" get next-change drop
+ "m" get next-change path>>
dup print flush
dup parent-directory
[ trim-right-separators "xyz" tail? ] either? not
"c1" get count-down
[
- "m" get next-change drop
+ "m" get next-change path>>
dup print flush
dup parent-directory
[ trim-right-separators "yxy" tail? ] either? not
! Non-recursive
[ ] [ "monitor-timeout-test" temp-file f <monitor> "m" set ] unit-test
[ ] [ 3 seconds "m" get set-timeout ] unit-test
- [ [ t ] [ "m" get next-change 2drop ] [ ] while ] must-fail
+ [ [ t ] [ "m" get next-change drop ] [ ] while ] must-fail
[ ] [ "m" get dispose ] unit-test
! Recursive
[ ] [ "monitor-timeout-test" temp-file t <monitor> "m" set ] unit-test
[ ] [ 3 seconds "m" get set-timeout ] unit-test
- [ [ t ] [ "m" get next-change 2drop ] [ ] while ] must-fail
+ [ [ t ] [ "m" get next-change drop ] [ ] while ] must-fail
[ ] [ "m" get dispose ] unit-test
] with-monitors
] when
-! Copyright (C) 2008 Slava Pestov.
+! Copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: io.backend kernel continuations destructors namespaces
sequences assocs hashtables sorting arrays threads boxes
-io.timeouts accessors concurrency.mailboxes
+io.timeouts accessors concurrency.mailboxes fry
system vocabs.loader combinators ;
IN: io.monitors
swap >>queue
swap >>path ; inline
+TUPLE: file-change path changed monitor ;
+
: queue-change ( path changes monitor -- )
3dup and and
- [ [ 3array ] keep queue>> mailbox-put ] [ 3drop ] if ;
+ [ [ file-change boa ] keep queue>> mailbox-put ] [ 3drop ] if ;
HOOK: (monitor) io-backend ( path recursive? mailbox -- monitor )
: <monitor> ( path recursive? -- monitor )
<mailbox> (monitor) ;
-: next-change ( monitor -- path changed )
- [ queue>> ] [ timeout ] bi mailbox-get-timeout first2 ;
+: next-change ( monitor -- change )
+ [ queue>> ] [ timeout ] bi mailbox-get-timeout ;
SYMBOL: +add-file+
SYMBOL: +remove-file+
: with-monitor ( path recursive? quot -- )
[ <monitor> ] dip with-disposal ; inline
+: run-monitor ( path recursive? quot -- )
+ '[ [ @ t ] loop ] with-monitor ; inline
+
+: spawn-monitor ( path recursive? quot -- )
+ [ '[ _ _ _ run-monitor ] ] [ 2drop "Monitoring " prepend ] 3bi
+ spawn drop ;
{
{ [ os macosx? ] [ "io.monitors.macosx" require ] }
{ [ os linux? ] [ "io.monitors.linux" require ] }
{ [ os winnt? ] [ "io.monitors.windows.nt" require ] }
- [ ]
+ { [ os bsd? ] [ ] }
} cond
-! Copyright (C) 2008 Slava Pestov.
+! Copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors sequences assocs arrays continuations
destructors combinators kernel threads concurrency.messaging
bi ;
: stop-pump ( -- )
- monitor tget children>> [ nip dispose ] assoc-each ;
+ monitor tget children>> values dispose-each ;
: pump-step ( msg -- )
- first3 path>> swap [ prepend-path ] dip monitor tget 3array
- monitor tget queue>>
- mailbox-put ;
+ [ [ monitor>> path>> ] [ path>> ] bi append-path ] [ changed>> ] bi
+ monitor tget queue-change ;
: child-added ( path monitor -- )
path>> prepend-path add-child-monitor ;
path>> prepend-path remove-child-monitor ;
: update-hierarchy ( msg -- )
- first3 swap [
+ [ path>> ] [ monitor>> ] [ changed>> ] tri [
{
{ +add-file+ [ child-added ] }
{ +remove-file+ [ child-removed ] }
}
}
{ $examples
- "Print the lines of a log file which contain the string ``error'', sort them and filter out duplicates, using Unix shell commands only:"
+ "Print the lines of a log file which contain the string “errorâ€, sort them and filter out duplicates, using Unix shell commands only:"
{ $code "{ \"cat log.txt\" \"grep error\" \"sort\" \"uniq\" } run-pipeline" }
} ;
output-port <buffered-port> ;
: wait-to-write ( len port -- )
- tuck buffer>> buffer-capacity <=
+ [ nip ] [ buffer>> buffer-capacity <= ] 2bi
[ drop ] [ stream-flush ] if ; inline
M: output-port stream-write1
IN: io.sockets.windows.nt
: malloc-int ( object -- object )
- "int" heap-size malloc tuck 0 set-alien-signed-4 ; inline
+ "int" heap-size malloc [ nip ] [ 0 set-alien-signed-4 ] 2bi ; inline
M: winnt WSASocket-flags ( -- DWORD )
WSA_FLAG_OVERLAPPED ;
"Unlimits a limited stream:"
{ $subsection unlimit }
"Unlimits the current " { $link input-stream } ":"
-{ $subsection limit-input }
+{ $subsection unlimit-input }
"Make a limited stream throw an exception on exhaustion:"
{ $subsection stream-throws }
"Make a limited stream return " { $link f } " on exhaustion:"
USING: io io.streams.limited io.encodings io.encodings.string
io.encodings.ascii io.encodings.binary io.streams.byte-array
-namespaces tools.test strings kernel io.streams.string accessors ;
+namespaces tools.test strings kernel io.streams.string accessors
+io.encodings.utf8 io.files destructors ;
IN: io.streams.limited.tests
[ ] [
"abc" <string-reader> 3 stream-eofs limit unlimit
"abc" <string-reader> =
] unit-test
+
+[ t ]
+[
+ "abc" <string-reader> 3 stream-eofs limit unlimit
+ "abc" <string-reader> =
+] unit-test
+
+[ t ]
+[
+ [
+ "resource:license.txt" utf8 <file-reader> &dispose
+ 3 stream-eofs limit unlimit
+ "resource:license.txt" utf8 <file-reader> &dispose
+ [ decoder? ] both?
+ ] with-destructors
+] unit-test
sequences namespaces byte-vectors fry combinators ;
IN: io.streams.limited
-TUPLE: limited-stream stream count limit mode ;
+TUPLE: limited-stream stream count limit mode stack ;
SINGLETONS: stream-throws stream-eofs ;
M: object limit ( stream limit mode -- stream' )
<limited-stream> ;
-: unlimit ( stream -- stream' )
+GENERIC: unlimit ( stream -- stream' )
+
+M: decoder unlimit ( stream -- stream' )
[ stream>> ] change-stream ;
+M: object unlimit ( stream -- stream' )
+ stream>> stream>> ;
+
: limit-input ( limit mode -- ) input-stream [ -rot limit ] change ;
: unlimit-input ( -- ) input-stream [ unlimit ] change ;
+: with-unlimited-stream ( stream quot -- )
+ [ clone unlimit ] dip call ; inline
+
+: with-limited-stream ( stream limit mode quot -- )
+ [ limit ] dip call ; inline
+
ERROR: limit-exceeded ;
ERROR: bad-stream-mode mode ;
--- /dev/null
+! Copyright (C) 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: lcs.diff2html lcs kernel tools.test strings sequences xml.writer ;
+IN: lcs.diff2html.tests
+
+[ ] [ "hello" "heyo" [ 1string ] { } map-as diff htmlize-diff xml-chunk>string drop ] unit-test
! Copyright (C) 2008 Slava Pestov
! See http://factorcode.org/license.txt for BSD license.
-USING: lcs html.elements kernel ;
+USING: lcs xml.interpolate xml.writer kernel strings ;
FROM: accessors => item>> ;
FROM: io => write ;
-FROM: sequences => each if-empty ;
-FROM: xml.entities => escape-string ;
+FROM: sequences => each if-empty when-empty map ;
IN: lcs.diff2html
-GENERIC: diff-line ( obj -- )
+GENERIC: diff-line ( obj -- xml )
-: write-item ( item -- )
- item>> [ " " ] [ escape-string ] if-empty write ;
+: item-string ( item -- string )
+ item>> [ CHAR: no-break-space 1string ] when-empty ;
M: retain diff-line
- <tr>
- dup [
- <td "retain" =class td>
- write-item
- </td>
- ] bi@
- </tr> ;
+ item-string
+ [XML <td class="retain"><-></td> XML]
+ dup [XML <tr><-><-></tr> XML] ;
M: insert diff-line
- <tr>
- <td> </td>
- <td "insert" =class td>
- write-item
- </td>
- </tr> ;
+ item-string [XML
+ <tr>
+ <td> </td>
+ <td class="insert"><-></td>
+ </tr>
+ XML] ;
M: delete diff-line
- <tr>
- <td "delete" =class td>
- write-item
- </td>
- <td> </td>
- </tr> ;
+ item-string [XML
+ <tr>
+ <td class="delete"><-></td>
+ <td> </td>
+ </tr>
+ XML] ;
-: htmlize-diff ( diff -- )
- <table "100%" =width "comparison" =class table>
- <tr> <th> "Old" write </th> <th> "New" write </th> </tr>
- [ diff-line ] each
- </table> ;
+: htmlize-diff ( diff -- xml )
+ [ diff-line ] map
+ [XML
+ <table width="100%" class="comparison">
+ <tr><th>Old</th><th>New</th></tr>
+ <->
+ </table>
+ XML] ;
}
"In a word with locals, literals expand into code which constructs the literal, and so every invocation pushes a new object:"
{ $example
+ "USE: locals"
"IN: scratchpad"
"TUPLE: person first-name last-name ;"
":: ordinary-word-test ( -- tuple )"
"Recall that the following two code snippets are equivalent:"
{ $code "'[ sq _ + ]" }
{ $code "[ [ sq ] dip + ] curry" }
-"The semantics of " { $link dip } " and " { $link curry } " are such that the first example behaves as if the top of the stack as ``inserted'' in the ``hole'' in the quotation's second element."
+"The semantics of " { $link dip } " and " { $link curry } " are such that the first example behaves as if the top of the stack as “inserted†in the “hole†in the quotation's second element."
$nl
"Conceptually, " { $link curry } " is defined so that the following two code snippets are equivalent:"
{ $code "3 [ - ] curry" }
{ $code "'[ [| a | a - ] curry ] call" }
"Instead, the first line above expands into something like the following:"
{ $code "[ [ swap [| a | a - ] ] curry call ]" }
-"This ensures that the fried value appears ``underneath'' the local variable " { $snippet "a" } " when the quotation calls."
+"This ensures that the fried value appears “underneath†the local variable " { $snippet "a" } " when the quotation calls."
$nl
"The precise behavior is the following. When frying a lambda, a stack shuffle (" { $link mnswap } ") is prepended to the lambda so that the " { $snippet "m" } " curried values, which start off at the top of the stack, are transposed with the " { $snippet "n" } " inputs to the lambda." ;
[ 10 ] [
[| | 0 '[ [let | A [ 10 ] | A _ + ] ] call ] call
-] unit-test
\ No newline at end of file
+] unit-test
+
+! Discovered by littledan
+[ "bar" ] [ [let | a [ [let | foo [ "bar" ] | foo ] ] | a ] ] unit-test
+[ 10 ] [ [let | a [ 10 ] | [let | b [ a ] | b ] ] ] unit-test
\ No newline at end of file
-! Copyright (C) 2007, 2008 Slava Pestov, Eduardo Cavazos.
+! Copyright (C) 2007, 2009 Slava Pestov, Eduardo Cavazos.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays combinators effects.parser
-generic.parser kernel lexer locals.errors
+generic.parser kernel lexer locals.errors fry
locals.rewrite.closures locals.types make namespaces parser
quotations sequences splitting words vocabs.parser ;
IN: locals.parser
(parse-bindings)
] [ 2drop ] if ;
+: with-bindings ( quot -- words assoc )
+ '[
+ in-lambda? on
+ _ H{ } make-assoc
+ ] { } make swap ; inline
+
: parse-bindings ( end -- bindings vars )
- [
- [ (parse-bindings) ] H{ } make-assoc
- ] { } make swap ;
+ [ (parse-bindings) ] with-bindings ;
: parse-bindings* ( end -- words assoc )
[
- [
- namespace push-locals
- (parse-bindings)
- namespace pop-locals
- ] { } make-assoc
- ] { } make swap ;
+ namespace push-locals
+ (parse-bindings)
+ namespace pop-locals
+ ] with-bindings ;
: (parse-wbindings) ( end -- )
dup parse-binding dup [
] [ 2drop ] if ;
: parse-wbindings ( end -- bindings vars )
- [
- [ (parse-wbindings) ] H{ } make-assoc
- ] { } make swap ;
+ [ (parse-wbindings) ] with-bindings ;
: parse-locals ( -- vars assoc )
"(" expect ")" parse-effect
: parse-locals-definition ( word -- word quot )
parse-locals \ ; (parse-lambda) <lambda>
- 2dup "lambda" set-word-prop
- rewrite-closures dup length 1 = [ first ] [ bad-lambda-rewrite ] if ;
+ [ "lambda" set-word-prop ]
+ [ rewrite-closures dup length 1 = [ first ] [ bad-lambda-rewrite ] if ] 2bi ;
: (::) ( -- word def ) CREATE-WORD parse-locals-definition ;
(match-first) drop ;
: (match-all) ( seq pattern-seq -- )
- tuck (match-first) swap
+ [ nip ] [ (match-first) swap ] 2bi
[
, [ swap (match-all) ] [ drop ] if*
] [ 2drop ] if* ;
"Constructing a complex number with an imaginary component equal to an integer zero simply returns the real number corresponding to the real component:"
{ $example "USING: math prettyprint ;" "C{ 1 2 } C{ 3 -2 } + ." "4" }
"Constructing a complex number with an imaginary component equal to floating point zero will still output a new complex number, however:"
-{ $example "USING: math prettyprint ;" "C{ 0.0 2.0 } C{ 0.0 1.0 } * ." "C{ 2.0 0.0 }" }
+{ $example "USING: math prettyprint ;" "C{ 0.0 2.0 } C{ 0.0 1.0 } * ." "C{ -2.0 0.0 }" }
"Unlike math, where all real numbers are also complex numbers, Factor only considers a number to be a complex number if its imaginary part is non-zero. However, complex number operations are fully supported for real numbers; they are treated as having an imaginary part of zero." ;
ARTICLE: "complex-numbers" "Complex numbers"
[ * ] 2keep gcd nip /i ; foldable
: mod-inv ( x n -- y )
- tuck gcd 1 = [
- dup 0 < [ + ] [ nip ] if
- ] [
- "Non-trivial divisor found" throw
- ] if ; foldable
+ [ nip ] [ gcd 1 = ] 2bi
+ [ dup 0 < [ + ] [ nip ] if ]
+ [ "Non-trivial divisor found" throw ] if ; foldable
: ^mod ( x y n -- z )
over 0 < [
"The words in the " { $vocab-link "math.libm" } " vocabulary call C standard library math functions. They are used to implement words in the " { $vocab-link "math.functions" } " vocabulary."
$nl
"They can be called directly, however there is little reason to do so, since they only implement real-valued functions, and in some cases place restrictions on the domain:"
-{ $example "2 acos ." "C{ 0.0 1.316957896924817 }" }
-{ $example "2 facos ." "0.0/0.0" }
+{ $example "USE: math.functions" "2 acos ." "C{ 0.0 1.316957896924817 }" }
+{ $example "USE: math.libm" "2 facos ." "0.0/0.0" }
"Trigonometric functions:"
{ $subsection fcos }
{ $subsection fsin }
dup V{ 0 } clone p= [
drop nip
] [
- tuck p/mod [ pick p* swap [ swapd p- ] dip ] dip (pgcd)
+ [ nip ] [ p/mod ] 2bi
+ [ pick p* swap [ swapd p- ] dip ] dip (pgcd)
] if ;
PRIVATE>
"Division by zero" throw
] [
dup 0 < [ [ neg ] bi@ ] when
- 2dup gcd nip tuck /i [ /i ] dip fraction>
+ 2dup gcd nip tuck [ /i ] 2bi@ fraction>
] if ;
M: ratio hashcode*
! See http://factorcode.org/license.txt for BSD license.
USING: io.encodings.ascii io.files io.files.unique kernel
mime.multipart tools.test io.streams.duplex io multiline
-assocs ;
+assocs accessors ;
IN: mime.multipart.tests
: upload-separator ( -- seq )
[ t ] [
mime-test-stream [ upload-separator parse-multipart ] with-input-stream
- nip "\"up.txt\"" swap key?
+ "file1" swap key?
] unit-test
[ t ] [
mime-test-stream [ upload-separator parse-multipart ] with-input-stream
- drop "\"text1\"" swap key?
+ "file1" swap key?
+] unit-test
+
+[ t ] [
+ mime-test-stream [ upload-separator parse-multipart ] with-input-stream
+ "file1" swap at filename>> "up.txt" =
] unit-test
USING: multiline kernel sequences io splitting fry namespaces
http.parsers hashtables assocs combinators ascii io.files.unique
accessors io.encodings.binary io.files byte-arrays math
-io.streams.string combinators.short-circuit strings ;
+io.streams.string combinators.short-circuit strings math.order ;
IN: mime.multipart
CONSTANT: buffer-size 65536
content-disposition bytes
filename temp-file
name name-content
-uploaded-files
-form-variables ;
+mime-parts ;
TUPLE: mime-file headers filename temporary-path ;
TUPLE: mime-variable headers key value ;
: <multipart> ( mime-separator -- multipart )
multipart new
swap >>mime-separator
- H{ } clone >>uploaded-files
- H{ } clone >>form-variables ;
+ H{ } clone >>mime-parts ;
ERROR: bad-header bytes ;
dup bytes>> [ fill-bytes ] unless ;
: split-bytes ( bytes separator -- leftover-bytes safe-to-dump )
- 2dup [ length ] [ length 1- ] bi* < [
- drop f
- ] [
- length 1- cut-slice swap
- ] if ;
+ dupd [ length ] bi@ 1- - short cut-slice swap ;
: dump-until-separator ( multipart -- multipart )
- dup [ current-separator>> ] [ bytes>> ] bi tuck start [
+ dup
+ [ current-separator>> ] [ bytes>> ] bi
+ [ nip ] [ start ] 2bi [
cut-slice
[ mime-write ]
- [ over current-separator>> length tail-slice >>bytes ] bi*
+ [ over current-separator>> length short tail-slice >>bytes ] bi*
] [
drop
- dup [ bytes>> ] [ current-separator>> ] bi split-bytes
- [ mime-write ] when*
+ dup [ bytes>> ] [ current-separator>> ] bi split-bytes mime-write
>>bytes fill-bytes dup end-of-stream?>> [ dump-until-separator ] unless
] if* ;
[ dump-until-separator ] with-string-writer ;
: read-header ( multipart -- multipart )
- "\r\n\r\n" dump-string dup "--\r" = [
- drop
+ dup bytes>> "--\r\n" sequence= [
+ t >>end-of-stream?
] [
- parse-headers >>header
+ "\r\n\r\n" dump-string parse-headers >>header
] if ;
: empty-name? ( string -- ? )
{ "''" "\"\"" "" f } member? ;
+: quote? ( ch -- ? ) "'\"" member? ;
+
+: quoted? ( str -- ? )
+ {
+ [ length 1 > ]
+ [ first quote? ]
+ [ [ first ] [ peek ] bi = ]
+ } 1&& ;
+
+: unquote ( str -- newstr )
+ dup quoted? [ but-last-slice rest-slice >string ] when ;
+
: save-uploaded-file ( multipart -- )
dup filename>> empty-name? [
drop
] [
[ [ header>> ] [ filename>> ] [ temp-file>> ] tri mime-file boa ]
- [ filename>> ]
- [ uploaded-files>> set-at ] tri
+ [ content-disposition>> "name" swap at unquote ]
+ [ mime-parts>> set-at ] tri
] if ;
-: save-form-variable ( multipart -- )
+: save-mime-part ( multipart -- )
dup name>> empty-name? [
drop
] [
- [ [ header>> ] [ name>> ] [ name-content>> ] tri mime-variable boa ]
- [ name>> ]
- [ form-variables>> set-at ] tri
+ [ [ header>> ] [ name>> unquote ] [ name-content>> ] tri mime-variable boa ]
+ [ name>> unquote ]
+ [ mime-parts>> set-at ] tri
] if ;
: dump-mime-file ( multipart filename -- multipart )
: parse-form-data ( multipart -- multipart )
"filename" lookup-disposition [
+ unquote
>>filename
[ dump-file ] [ save-uploaded-file ] bi
] [
"name" lookup-disposition [
[ dup mime-separator>> dump-string >>name-content ] dip
- >>name dup save-form-variable
+ >>name dup save-mime-part
] [
unknown-content-disposition
] if*
read-header
dup end-of-stream?>> [ process-header parse-multipart-loop ] unless ;
-: parse-multipart ( separator -- form-variables uploaded-files )
- <multipart> parse-beginning parse-multipart-loop
- [ form-variables>> ] [ uploaded-files>> ] bi ;
+: parse-multipart ( separator -- mime-parts )
+ <multipart> parse-beginning fill-bytes parse-multipart-loop
+ mime-parts>> ;
[ "hello\nworld" ] [ <" hello
world"> ] unit-test
+
+[ "hello" "world" ] [ <" hello"> <" world"> ] unit-test
+
+[ "\nhi" ] [ <"
+hi"> ] unit-test
! Copyright (C) 2007 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: namespaces make parser lexer kernel sequences words
-quotations math accessors ;
+quotations math accessors locals ;
IN: multiline
<PRIVATE
(( -- string )) define-inline ; parsing
<PRIVATE
-: (parse-multiline-string) ( start-index end-text -- end-index )
- lexer get line-text>> [
- 2dup start
- [ rot dupd [ swap subseq % ] 2dip length + ] [
- rot tail % "\n" % 0
- lexer get next-line swap (parse-multiline-string)
+
+:: (parse-multiline-string) ( i end -- j )
+ lexer get line-text>> :> text
+ text [
+ end text i start* [| j |
+ i j text subseq % j end length +
+ ] [
+ text i short tail % CHAR: \n ,
+ lexer get next-line
+ 0 end (parse-multiline-string)
] if*
- ] [ nip unexpected-eof ] if* ;
+ ] [ end unexpected-eof ] if ;
+
PRIVATE>
: parse-multiline-string ( end-text -- str )
[
- lexer get [ swap (parse-multiline-string) ] change-column drop
- ] "" make rest ;
+ lexer get
+ [ 1+ swap (parse-multiline-string) ]
+ change-column drop
+ ] "" make ;
: <"
"\">" parse-multiline-string parsed ; parsing
+: <'
+ "'>" parse-multiline-string parsed ; parsing
+
+: {'
+ "'}" parse-multiline-string parsed ; parsing
+
+: {"
+ "\"}" parse-multiline-string parsed ; parsing
+
: /* "*/" parse-multiline-string drop ; parsing
! See http://factorcode.org/license.txt for BSD license.
!
USING: kernel tools.test strings namespaces make arrays sequences
- peg peg.private accessors words math accessors ;
+ peg peg.private peg.parsers accessors words math accessors ;
IN: peg.tests
+[ ] [ reset-pegs ] unit-test
+
[
"endbegin" "begin" token parse
] must-fail
"B" [ drop t ] satisfy [ 66 >= ] semantic parse
] unit-test
-{ f } [ \ + T{ parser f f f } equal? ] unit-test
\ No newline at end of file
+{ f } [ \ + T{ parser f f f } equal? ] unit-test
+
+USE: compiler
+
+[ ] [ disable-compiler ] unit-test
+
+[ ] [ "" epsilon parse drop ] unit-test
+
+[ ] [ enable-compiler ] unit-test
+
+[ [ ] ] [ "" epsilon [ drop [ [ ] ] call ] action parse ] unit-test
+
+[ [ ] ] [ "" epsilon [ drop [ [ ] ] ] action [ call ] action parse ] unit-test
\ No newline at end of file
IN: persistent.hashtables.nodes.leaf
: matching-key? ( key hashcode leaf-node -- ? )
- tuck hashcode>> eq? [ key>> = ] [ 2drop f ] if ; inline
+ [ nip ] [ hashcode>> eq? ] 2bi
+ [ key>> = ] [ 2drop f ] if ; inline
M: leaf-node (entry-at) [ matching-key? ] keep and ;
{ $contract "Persistent analogue of " { $link pop* } ". Outputs a new sequence with all elements of " { $snippet "seq" } " except for the final element." } ;
ARTICLE: "persistent.sequences" "Persistent sequence protocol"
-"The persistent sequence protocol consists of the non-mutating sequence protocol words, such as " { $link length } " and " { $link nth } ", together with the following operations:"
+"The persistent sequence protocol consists of the non-mutating sequence protocol words, such as " { $link length } " and " { $link nth } ", together with the following operations:"
{ $subsection new-nth }
{ $subsection ppush }
{ $subsection ppop }
HELP: pprint-short
{ $values { "obj" object } }
-{ $description "Prettyprints an object to " { $link output-stream } ". This word rebinds printer control variables to enforce ``shorter'' output. See " { $link "prettyprint-variables" } "." } ;
+{ $description "Prettyprints an object to " { $link output-stream } ". This word rebinds printer control variables to enforce “shorter†output. See " { $link "prettyprint-variables" } "." } ;
HELP: short.
{ $values { "obj" object } }
-{ $description "Prettyprints an object to " { $link output-stream } " with a trailing line break. This word rebinds printer control variables to enforce ``shorter'' output." } ;
+{ $description "Prettyprints an object to " { $link output-stream } " with a trailing line break. This word rebinds printer control variables to enforce “shorter†output." } ;
HELP: .b
{ $values { "n" "an integer" } }
M: object declarations. drop ;
: declaration. ( word prop -- )
- tuck name>> word-prop [ pprint-word ] [ drop ] if ;
+ [ nip ] [ name>> word-prop ] 2bi
+ [ pprint-word ] [ drop ] if ;
M: word declarations.
{
ARTICLE: "random" "Generating random integers"
"The " { $vocab-link "random" } " vocabulary contains a protocol for generating random or pseudorandom numbers."
$nl
-"The ``Mersenne Twister'' pseudorandom number generator algorithm is the default generator stored in " { $link random-generator } "."
+"The “Mersenne Twister†pseudorandom number generator algorithm is the default generator stored in " { $link random-generator } "."
$nl
"Generate a random object:"
{ $subsection random }
"References to values:"
{ $subsection value-ref }
{ $subsection <value-ref> }
-"References are used by the inspector." ;
+"References are used by the UI inspector." ;
ABOUT: "refs"
dup
[ nfa-traversal-flags>> ]
[ dfa-table>> transitions>> keys ] bi
- [ tuck [ swap at ] with map concat ] with H{ } map>assoc
+ [ [ nip ] [ [ swap at ] with map concat ] 2bi ] with H{ } map>assoc
>>dfa-traversal-flags drop ;
: construct-dfa ( regexp -- )
: cut-out ( vector n -- vector' vector ) cut rest ;
ERROR: cut-stack-error ;
: cut-stack ( obj vector -- vector' vector )
- tuck last-index [ cut-stack-error ] unless* cut-out swap ;
+ [ nip ] [ last-index ] 2bi [ cut-stack-error ] unless* cut-out swap ;
: <possessive-kleene-star> ( obj -- kleene ) possessive-kleene-star boa ;
: <reluctant-kleene-star> ( obj -- kleene ) reluctant-kleene-star boa ;
[ { "1" "2" "3" "4" } ]
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
-[ { "1" "2" "3" "4" } ]
+[ { "1" "2" "3" "4" "" } ]
[ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
+[ { "" } ] [ "" R/ =/ re-split [ >string ] map ] unit-test
+
+[ { "a" "" } ] [ "a=" R/ =/ re-split [ >string ] map ] unit-test
+
[ { "ABC" "DEF" "GHI" } ]
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matches [ >string ] map ] unit-test
[ 0 ]
[ "123" R/ [A-Z]+/ count-matches ] unit-test
-[ "1.2.3.4" ]
+[ "1.2.3.4." ]
[ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ "." re-replace ] unit-test
+
+[ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
/*
! FIXME
[ f ] [ "ab" "a(?!b)" <regexp> first-match ] unit-test
[ "a" ] [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
-! [ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
+[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> first-match >string ] unit-test
[ "a" ] [ "ba" "a(?<=b)(?<=b)" <regexp> first-match >string ] unit-test
*/
! Bug in parsing word
-[ t ] [ "a" R' a' matches? ] unit-test
+[ t ] [ "a" R' a' matches? ] unit-test
! Convert to lowercase until E
[ f ] [ "AA" R/ \LAA\E/ matches? ] unit-test
dupd first-match
[ split1-slice swap ] [ "" like f swap ] if* ;
+: (re-split) ( string regexp -- )
+ over [ [ re-cut , ] keep (re-split) ] [ 2drop ] if ;
+
: re-split ( string regexp -- seq )
- [ dup length 0 > ] swap '[ _ re-cut ] [ ] produce nip ;
+ [ (re-split) ] { } make ;
: re-replace ( string regexp replacement -- result )
[ re-split ] dip join ;
H{ } clone >>final-states ;
: maybe-initialize-key ( key hashtable -- )
- 2dup key? [ 2drop ] [ H{ } clone -rot set-at ] if ;
+ 2dup key? [ 2drop ] [ [ H{ } clone ] 2dip set-at ] if ;
: set-transition ( transition hash -- )
#! set the state as a key
(deserialize) (deserialize) 2dup lookup
dup [ 2nip ] [
drop
- "Unknown word: " -rot
- 2array unparse append throw
+ 2array unparse "Unknown word: " prepend throw
] if ;
: deserialize-gensym ( -- word )
}
{ $description "Splits a string on numbers and returns a sequence of sequences and integers." } ;
-HELP: human-<=>
+HELP: human<=>
{ $values
{ "obj1" object } { "obj2" object }
{ "<=>" "an ordering specifier" }
}
{ $description "Compares two objects after converting numbers in the string into integers." } ;
-HELP: human->=<
+HELP: human>=<
{ $values
{ "obj1" object } { "obj2" object }
{ ">=<" "an ordering specifier" }
}
-{ $description "Compares two objects using the " { $link human-<=> } " word and inverts the result." } ;
+{ $description "Compares two objects using the " { $link human<=> } " word and inverts the result." } ;
HELP: human-compare
{ $values
{ "seq" "an alist" }
{ "sortedseq" "a new sorted sequence" }
}
-{ $description "Sorts the elements comparing first elements of pairs using the " { $link human-<=> } " word." } ;
+{ $description "Sorts the elements comparing first elements of pairs using the " { $link human<=> } " word." } ;
HELP: human-sort-values
{ $values
{ "seq" "an alist" }
{ "sortedseq" "a new sorted sequence" }
}
-{ $description "Sorts the elements comparing second elements of pairs using the " { $link human-<=> } " word." } ;
+{ $description "Sorts the elements comparing second elements of pairs using the " { $link human<=> } " word." } ;
{ <=> >=< human-compare human-sort human-sort-keys human-sort-values } related-words
-ARTICLE: "sorting.human" "sorting.human"
+ARTICLE: "sorting.human" "Human-friendly sorting"
"The " { $vocab-link "sorting.human" } " vocabulary sorts by numbers as a human would -- by comparing their magnitudes -- rather than in a lexicographic way. For example, sorting a1, a10, a03, a2 with human sort returns a1, a2, a03, a10, while sorting with natural sort returns a03, a1, a10, a2." $nl
"Comparing two objects:"
-{ $subsection human-<=> }
-{ $subsection human->=< }
+{ $subsection human<=> }
+{ $subsection human>=< }
{ $subsection human-compare }
"Sort a sequence:"
{ $subsection human-sort }
: find-numbers ( string -- seq )
[EBNF Result = ([0-9]+ => [[ string>number ]] | (!([0-9]) .)+)* EBNF] ;
-: human-<=> ( obj1 obj2 -- <=> ) [ find-numbers ] bi@ <=> ;
+: human<=> ( obj1 obj2 -- <=> ) [ find-numbers ] bi@ <=> ;
-: human->=< ( obj1 obj2 -- >=< ) human-<=> invert-comparison ; inline
+: human>=< ( obj1 obj2 -- >=< ) human<=> invert-comparison ; inline
-: human-compare ( obj1 obj2 quot -- <=> ) bi@ human-<=> ;
+: human-compare ( obj1 obj2 quot -- <=> ) bi@ human<=> ;
-: human-sort ( seq -- seq' ) [ human-<=> ] sort ;
+: human-sort ( seq -- seq' ) [ human<=> ] sort ;
: human-sort-keys ( seq -- sortedseq )
[ [ first ] human-compare ] sort ;
T{ sort-test f 1 1 11 }
T{ sort-test f 2 5 3 }
T{ sort-test f 2 5 2 }
- } { { a>> human-<=> } { b>> human->=< } { c>> <=> } } sort-by-slots
+ } { { a>> human<=> } { b>> human>=< } { c>> <=> } } sort-by-slots
] unit-test
[
T{ sort-test f 2 5 3 }
T{ sort-test f 2 5 2 }
}
- { { a>> human-<=> } { b>> <=> } } [ sort-by-slots ] keep
+ { { a>> human<=> } { b>> <=> } } [ sort-by-slots ] keep
[ but-last-slice ] map split-by-slots [ >array ] map
] unit-test
\ dll-valid? { object } { object } define-primitive
-\ modify-code-heap { array object } { } define-primitive
+\ modify-code-heap { array } { } define-primitive
\ unimplemented { } { } define-primitive
ARTICLE: "inference-combinators" "Combinator stack effects"
"Without further information, one cannot say what the stack effect of " { $link call } " is; it depends on the given quotation. If the inferencer encounters a " { $link call } " without further information, a " { $link literal-expected } " error is raised."
-{ $example "[ dup call ] infer." "... an error ..." }
+{ $example "[ dup call ] infer." "Literal value expected\n\nType :help for debugging help." }
"On the other hand, the stack effect of applying " { $link call } " to a literal quotation or a " { $link curry } " of a literal quotation is easy to compute; it behaves as if the quotation was substituted at that point:"
{ $example "[ [ 2 + ] call ] infer." "( object -- object )" }
"Consider a combinator such as " { $link keep } ". The combinator itself does not have a stack effect, because it applies " { $link call } " to a potentially arbitrary quotation. However, since the combinator is declared " { $link POSTPONE: inline } ", a given usage of it can have a stack effect:"
"Here is an example where the stack effect cannot be inferred:"
{ $code ": foo 0 [ + ] ;" "[ foo reduce ] infer." }
"However if " { $snippet "foo" } " was declared " { $link POSTPONE: inline } ", everything would work, since the " { $link reduce } " combinator is also " { $link POSTPONE: inline } ", and the inferencer can see the literal quotation value at the point it is passed to " { $link call } ":"
-{ $example ": foo 0 [ + ] ; inline" "[ foo reduce ] infer." "( object -- object )" } ;
+{ $example ": foo 0 [ + ] ; inline" "[ foo reduce ] infer." "( object -- object )" }
+"Passing a literal quotation on the data stack through an inlined recursive combinator nullifies its literal status. For example, the following will not infer:"
+{ $example
+ "[ [ reverse ] swap [ reverse ] map swap call ] infer." "Literal value expected\n\nType :help for debugging help."
+}
+"To make this work, pass the quotation on the retain stack instead:"
+{ $example
+ "[ [ reverse ] [ [ reverse ] map ] dip call ] infer." "( object -- object )"
+} ;
ARTICLE: "inference-branches" "Branch stack effects"
"Conditionals such as " { $link if } " and combinators built on " { $link if } " present a problem, in that if the two branches leave the stack at a different height, it is not clear what the stack effect should be. In this case, inference throws a " { $link unbalanced-branches-error } "."
$nl
"If a recursive word takes quotation parameters from the stack and calls them, it must be declared " { $link POSTPONE: inline } " (as documented in " { $link "inference-combinators" } ") as well as " { $link POSTPONE: recursive } "."
$nl
-"Furthermore, the input parameters which are quotations must be annotated in the stack effect. For example,"
-{ $see loop }
-"An inline recursive word cannot pass a quotation through the recursive call. For example, the following will not infer:"
-{ $code ": foo ( a b c -- d e f ) [ f foo drop ] when 2dup call ; inline" "[ 1 [ 1+ ] foo ] infer." }
+"Furthermore, the input parameters which are quotations must be annotated in the stack effect. For example, the following will not infer:"
+{ $example ": bad ( quot -- ) [ call ] keep foo ; inline recursive" "[ [ ] bad ] infer." "Literal value expected\n\nType :help for debugging help." }
+"The following is correct:"
+{ $example ": good ( quot: ( -- ) -- ) [ call ] keep good ; inline recursive" "[ [ ] good ] infer." "( -- )" }
+"An inline recursive word cannot pass a quotation on the data stack through the recursive call. For example, the following will not infer:"
+{ $example ": bad ( ? quot: ( ? -- ) -- ) 2dup [ not ] dip bad call ; inline recursive" "[ [ drop ] bad ] infer." "Literal value expected\n\nType :help for debugging help." }
"However a small change can be made:"
-{ $example ": foo ( a b c -- d ) [ 2dup f foo drop ] when call ; inline" "[ 1 [ 1+ ] t foo ] infer." "( -- object )" }
+{ $example ": good ( ? quot: ( ? -- ) -- ) [ good ] 2keep [ not ] dip call ; inline recursive" "[ [ drop ] good ] infer." "( object -- )" }
"An inline recursive word must have a fixed stack effect in its base case. The following will not infer:"
{ $code
": foo ( quot ? -- ) [ f foo ] [ call ] if ; inline"
+++ /dev/null
-Daniel Ehrenberg
+++ /dev/null
-USING: help.markup help.syntax ;
-IN: state-parser
-
-ABOUT: { "state-parser" "main" }
-
-ARTICLE: { "state-parser" "main" } "State-based parsing"
- "This module defines a state-based parsing mechanism. It was originally created for libs/xml, but is also used in libs/csv and can be easily used in new libraries or applications."
- { $subsection spot }
- { $subsection skip-until }
- { $subsection take-until }
- { $subsection take-char }
- { $subsection take-string }
- { $subsection next }
- { $subsection state-parse }
- { $subsection get-char }
- { $subsection take-rest }
- { $subsection string-parse }
- { $subsection expect }
- { $subsection expect-string }
- { $subsection parsing-error } ;
-
-HELP: get-char
-{ $values { "char" "the current character" } }
-{ $description "Accesses the current character of the stream that is being parsed" } ;
-
-HELP: take-rest
-{ $values { "string" "the rest of the parser input" } }
-{ $description "Exausts the stream of the parser input and returns a string representing the rest of the input" } ;
-
-HELP: string-parse
-{ $values { "input" "a string" } { "quot" "a quotation ( -- )" } }
-{ $description "Calls the given quotation using the given string as parser input" }
-{ $see-also state-parse } ;
-
-HELP: expect
-{ $values { "ch" "a number representing a character" } }
-{ $description "Asserts that the current character is the given ch, and moves to the next spot" }
-{ $see-also expect-string } ;
-
-HELP: expect-string
-{ $values { "string" "a string" } }
-{ $description "Asserts that the current parsing spot is followed by the given string, and skips the parser past that string" }
-{ $see-also expect } ;
-
-HELP: spot
-{ $var-description "This variable represents the location in the program. It is a tuple T{ spot f char column line next } where char is the current character, line is the line number, column is the column number, and line-str is the full contents of the line, as a string. The contents shouldn't be accessed directly but rather with the proxy words get-char set-char get-line etc." } ;
-
-HELP: skip-until
-{ $values { "quot" "a quotation ( -- ? )" } }
-{ $description "executes " { $link next } " until the quotation yields false. Usually, the quotation will call " { $link get-char } " in its test, but not always." }
-{ $see-also take-until } ;
-
-HELP: take-until
-{ $values { "quot" "a quotation ( -- ? )" } { "string" "a string" } }
-{ $description "like " { $link skip-until } " but records what it passes over and outputs the string." }
-{ $see-also skip-until take-char take-string } ;
-
-HELP: take-char
-{ $values { "ch" "a character" } { "string" "a string" } }
-{ $description "records the document from the current spot to the first instance of the given character. Outputs the content between those two points." }
-{ $see-also take-until take-string } ;
-
-HELP: take-string
-{ $values { "match" "a string to match" } { "string" "the portion of the XML document" } }
-{ $description "records the document from the current spot to the first instance of the given character. Outputs the content between those two points." }
-{ $notes "match may not contain a newline" } ;
-
-HELP: next
-{ $description "originally written as " { $code "spot inc" } ", code that would no longer run, this word moves the state of the XML parser to the next place in the source file, keeping track of appropriate debugging information." } ;
-
-HELP: parsing-error
-{ $class-description "class from which parsing errors inherit, containing information about which line and column the error occured on, and what the line was. Contains three slots, line, an integer, column, another integer, and line-str, a string" } ;
+++ /dev/null
-USING: tools.test state-parser kernel io strings ascii ;
-
-[ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test
-[ 2 4 ] [ "12\n123" [ take-rest drop get-line get-column ] string-parse ] unit-test
-[ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test
-[ "foo" ";bar" ] [ "foo;bar" [ CHAR: ; take-char take-rest ] string-parse ] unit-test
-[ "foo " " bar" ] [ "foo and bar" [ "and" take-string take-rest ] string-parse ] unit-test
-[ "baz" ] [ " \n\t baz" [ pass-blank take-rest ] string-parse ] unit-test
+++ /dev/null
-! Copyright (C) 2005, 2006 Daniel Ehrenberg\r
-! See http://factorcode.org/license.txt for BSD license.\r
-USING: io io.streams.string kernel math namespaces sequences\r
-strings circular prettyprint debugger ascii sbufs fry summary\r
-accessors ;\r
-IN: state-parser\r
-\r
-! * Basic underlying words\r
-! Code stored in stdio\r
-! Spot is composite so it won't be lost in sub-scopes\r
-TUPLE: spot char line column next ;\r
-\r
-C: <spot> spot\r
-\r
-: get-char ( -- char ) spot get char>> ;\r
-: set-char ( char -- ) spot get swap >>char drop ;\r
-: get-line ( -- line ) spot get line>> ;\r
-: set-line ( line -- ) spot get swap >>line drop ;\r
-: get-column ( -- column ) spot get column>> ;\r
-: set-column ( column -- ) spot get swap >>column drop ;\r
-: get-next ( -- char ) spot get next>> ;\r
-: set-next ( char -- ) spot get swap >>next drop ;\r
-\r
-! * Errors\r
-TUPLE: parsing-error line column ;\r
-\r
-: parsing-error ( class -- obj )\r
- new\r
- get-line >>line\r
- get-column >>column ;\r
-M: parsing-error summary ( obj -- str )\r
- [\r
- "Parsing error" print\r
- "Line: " write dup line>> .\r
- "Column: " write column>> .\r
- ] with-string-writer ;\r
-\r
-TUPLE: expected < parsing-error should-be was ;\r
-: expected ( should-be was -- * )\r
- \ expected parsing-error\r
- swap >>was\r
- swap >>should-be throw ;\r
-M: expected summary ( obj -- str )\r
- [\r
- dup call-next-method write\r
- "Token expected: " write dup should-be>> print\r
- "Token present: " write was>> print\r
- ] with-string-writer ;\r
-\r
-TUPLE: unexpected-end < parsing-error ;\r
-: unexpected-end ( -- * ) \ unexpected-end parsing-error throw ;\r
-M: unexpected-end summary ( obj -- str )\r
- [\r
- call-next-method write\r
- "File unexpectedly ended." print\r
- ] with-string-writer ;\r
-\r
-TUPLE: missing-close < parsing-error ;\r
-: missing-close ( -- * ) \ missing-close parsing-error throw ;\r
-M: missing-close summary ( obj -- str )\r
- [\r
- call-next-method write\r
- "Missing closing token." print\r
- ] with-string-writer ;\r
-\r
-SYMBOL: prolog-data\r
-\r
-! * Basic utility words\r
-\r
-: record ( char -- )\r
- CHAR: \n =\r
- [ 0 get-line 1+ set-line ] [ get-column 1+ ] if\r
- set-column ;\r
-\r
-! (next) normalizes \r\n and \r\r
-: (next) ( -- char )\r
- get-next read1\r
- 2dup swap CHAR: \r = [\r
- CHAR: \n =\r
- [ nip read1 ] [ nip CHAR: \n swap ] if\r
- ] [ drop ] if\r
- set-next dup set-char ;\r
-\r
-: next ( -- )\r
- #! Increment spot.\r
- get-char [ unexpected-end ] unless (next) record ;\r
-\r
-: next* ( -- )\r
- get-char [ (next) record ] when ;\r
-\r
-: skip-until ( quot: ( -- ? ) -- )\r
- get-char [\r
- [ call ] keep swap [ drop ] [\r
- next skip-until\r
- ] if\r
- ] [ drop ] if ; inline recursive\r
-\r
-: take-until ( quot -- string )\r
- #! Take the substring of a string starting at spot\r
- #! from code until the quotation given is true and\r
- #! advance spot to after the substring.\r
- 10 <sbuf> [\r
- '[ @ [ t ] [ get-char _ push f ] if ] skip-until\r
- ] keep >string ; inline\r
-\r
-: take-rest ( -- string )\r
- [ f ] take-until ;\r
-\r
-: take-char ( ch -- string )\r
- [ dup get-char = ] take-until nip ;\r
-\r
-TUPLE: not-enough-characters < parsing-error ;\r
-: not-enough-characters ( -- * )\r
- \ not-enough-characters parsing-error throw ;\r
-M: not-enough-characters summary ( obj -- str )\r
- [\r
- call-next-method write\r
- "Not enough characters" print\r
- ] with-string-writer ;\r
-\r
-: take ( n -- string )\r
- [ 1- ] [ <sbuf> ] bi [\r
- '[ drop get-char [ next _ push f ] [ t ] if* ] contains? drop\r
- ] keep get-char [ over push ] when* >string ;\r
-\r
-: pass-blank ( -- )\r
- #! Advance code past any whitespace, including newlines\r
- [ get-char blank? not ] skip-until ;\r
-\r
-: string-matches? ( string circular -- ? )\r
- get-char over push-circular\r
- sequence= ;\r
-\r
-: take-string ( match -- string )\r
- dup length <circular-string>\r
- [ 2dup string-matches? ] take-until nip\r
- dup length rot length 1- - head\r
- get-char [ missing-close ] unless next ;\r
-\r
-: expect ( ch -- )\r
- get-char 2dup = [ 2drop ] [\r
- [ 1string ] bi@ expected\r
- ] if next ;\r
-\r
-: expect-string ( string -- )\r
- dup [ get-char next ] replicate 2dup =\r
- [ 2drop ] [ expected ] if ;\r
-\r
-: init-parser ( -- )\r
- 0 1 0 f <spot> spot set\r
- read1 set-next next ;\r
-\r
-: state-parse ( stream quot -- )\r
- ! with-input-stream implicitly creates a new scope which we use\r
- swap [ init-parser call ] with-input-stream ; inline\r
-\r
-: string-parse ( input quot -- )\r
- [ <string-reader> ] dip state-parse ; inline\r
+++ /dev/null
-State-machined based text parsing framework
USING: syndication io kernel io.files tools.test io.encodings.utf8
-calendar urls ;
+calendar urls xml.writer ;
IN: syndication.tests
\ download-feed must-infer
}
}
} ] [ "resource:basis/syndication/test/atom.xml" load-news-file ] unit-test
+[ ] [ "resource:basis/syndication/test/atom.xml" load-news-file feed>xml xml>string drop ] unit-test
! Copyright (C) 2006 Chris Double, Daniel Ehrenberg.
! Portions copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: xml.utilities kernel assocs xml.generator math.order
+USING: xml.utilities kernel assocs math.order
strings sequences xml.data xml.writer
io.streams.string combinators xml xml.entities.html io.files io
- http.client namespaces make xml.generator hashtables
+ http.client namespaces make xml.interpolate hashtables
calendar.format accessors continuations urls present ;
IN: syndication
: any-tag-named ( tag names -- tag-inside )
- f -rot [ tag-named nip dup ] with find 2drop ;
+ [ f ] 2dip [ tag-named nip dup ] with find 2drop ;
TUPLE: feed title url entries ;
http-get nip string>feed ;
! Atom generation
-: simple-tag, ( content name -- )
- [ , ] tag, ;
-
-: simple-tag*, ( content name attrs -- )
- [ , ] tag*, ;
-
-: entry, ( entry -- )
- "entry" [
- {
- [ title>> "title" { { "type" "html" } } simple-tag*, ]
- [ url>> present "href" associate "link" swap contained*, ]
- [ date>> timestamp>rfc3339 "published" simple-tag, ]
- [ description>> [ "content" { { "type" "html" } } simple-tag*, ] when* ]
- } cleave
- ] tag, ;
+
+: entry>xml ( entry -- xml )
+ {
+ [ title>> ]
+ [ url>> present ]
+ [ date>> timestamp>rfc3339 ]
+ [ description>> ]
+ } cleave
+ [XML
+ <entry>
+ <title type="html"><-></title>
+ <link href=<-> />
+ <published><-></published>
+ <content type="html"><-></content>
+ </entry>
+ XML] ;
: feed>xml ( feed -- xml )
- "feed" { { "xmlns" "http://www.w3.org/2005/Atom" } } [
- [ title>> "title" simple-tag, ]
- [ url>> present "href" associate "link" swap contained*, ]
- [ entries>> [ entry, ] each ]
- tri
- ] make-xml* ;
+ [ title>> ]
+ [ url>> present ]
+ [ entries>> [ entry>xml ] map ] tri
+ <XML
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <title><-></title>
+ <link href=<-> />
+ <->
+ </feed>
+ XML> ;
USING: help.markup help.syntax threads ;
HELP: threads.
-{ $description "Prints a list of running threads and their state. The ``Waiting on'' column displays one of the following:"
+{ $description "Prints a list of running threads and their state. The “Waiting on†column displays one of the following:"
{ $list
- "``running'' if the thread is the current thread"
- "``yield'' if the thread is waiting to run"
+ "“running†if the thread is the current thread"
+ "“yield†if the thread is waiting to run"
{ "the string given to " { $link suspend } " if the thread is suspended" }
}
} ;
-! Copyright (C) 2008 Slava Pestov.\r
+! Copyright (C) 2008, 2009 Slava Pestov.\r
! See http://factorcode.org/license.txt for BSD license.\r
USING: threads io.files io.pathnames io.monitors init kernel\r
vocabs vocabs.loader tools.vocabs namespaces continuations\r
sequences splitting assocs command-line concurrency.messaging\r
-io.backend sets tr ;\r
+io.backend sets tr accessors ;\r
IN: tools.vocabs.monitor\r
\r
TR: convert-separators "/\\" ".." ;\r
: monitor-loop ( -- )\r
#! On OS X, monitors give us the full path, so we chop it\r
#! off if its there.\r
- receive first path>vocab changed-vocab\r
+ receive path>> path>vocab changed-vocab\r
reset-cache\r
monitor-loop ;\r
\r
dupd editor-select-next mark>caret ;
: editor-select ( from to editor -- )
- tuck caret>> set-model mark>> set-model ;
+ tuck [ mark>> set-model ] [ caret>> set-model ] 2bi* ;
: select-elt ( editor elt -- )
[ [ [ editor-caret* ] [ model>> ] bi ] dip prev/next-elt ] [ drop ] 2bi
in-layout? get [ invalidate ] [ invalidate* ] if ;
M: gadget (>>dim) ( dim gadget -- )
- 2dup dim>> = [ 2drop ] [ tuck call-next-method dim-changed ] if ;
+ 2dup dim>> =
+ [ 2drop ]
+ [ [ nip ] [ call-next-method ] 2bi dim-changed ] if ;
GENERIC: pref-dim* ( gadget -- dim )
f >>parent drop ;
: unfocus-gadget ( child gadget -- )
- tuck focus>> eq? [ f >>focus ] when drop ;
+ [ nip ] [ focus>> eq? ] 2bi [ f >>focus ] when drop ;
SYMBOL: in-layout?
dup unparent
over >>parent
tuck ((add-gadget))
- tuck graft-state>> second
- [ graft ]
- [ drop ]
- if ;
+ tuck graft-state>> second [ graft ] [ drop ] if ;
: add-gadget ( parent child -- parent )
not-in-layout
: (screen-rect) ( gadget -- loc ext )
dup parent>> [
[ rect-extent ] dip (screen-rect)
- [ tuck v+ ] dip vmin [ v+ ] dip
+ [ [ nip ] [ v+ ] 2bi ] dip [ v+ ] [ vmin ] 2bi*
] [
rect-extent
] if* ;
] keep orientation>> set-axis ;
: update-cursor ( gadget incremental -- )
- tuck next-cursor >>cursor drop ;
+ [ nip ] [ next-cursor ] 2bi >>cursor drop ;
: incremental-loc ( gadget incremental -- )
[ cursor>> ] [ orientation>> ] bi v*
{ $operations "kernel" vocab } ;
ARTICLE: "ui-completion-sources" "Source file completion popup"
-"The source file completion popup lists all source files which have been previously loaded by " { $link run-file } ". Clicking a source file or pressing " { $snippet "RET" } " opens the source file in your editor with " { $link edit } "."
+"The source file completion popup lists all source files which have been previously loaded by " { $link run-file } ". Clicking a source file or pressing " { $snippet "RET" } " opens the source file in your editor with " { $link edit } "."
{ $operations P" " } ;
ARTICLE: "ui-completion" "UI completion popups"
{ $subsection add-gadgets }
{ $subsection clear-gadget }
"The children of a gadget are available via the "
-{ $snippet "children" } " slot. " "Working with gadget children:"
+{ $snippet "children" } " slot. "
+$nl
+"Working with gadget children:"
{ $subsection gadget-child }
{ $subsection nth-gadget }
{ $subsection each-child }
{ $subsection relayout-1 }
"Gadgets implement a generic word to inform their parents of their preferred size:"
{ $subsection pref-dim* }
-"To get a gadget's preferred size, do not call the above word, instead use " { $link pref-dim } ", which caches the result." ;
+"To get a gadget's preferred size, do not call the above word, instead use " { $link pref-dim } ", which caches the result." ;
ARTICLE: "ui-null-layout" "Manual layouts"
"When automatic layout is not appropriate, gadgets can be added to a parent with no layout policy, and then positioned and sized manually by setting the " { $snippet "loc" } " field." ;
: first-grapheme ( str -- i )
unclip-slice grapheme-class over
- [ grapheme-class tuck grapheme-break? ] find drop
+ [ grapheme-class [ nip ] [ grapheme-break? ] 2bi ] find drop
nip swap length or 1+ ;
<PRIVATE
-! Copyright (C) 2009 Your name.
+! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax kernel ;
IN: unicode.categories
HELP: LETTER
-{ $class-description "The class of upper cased letters" } ;
+{ $class-description "The class of upper cased letters." } ;
HELP: Letter
-{ $class-description "The class of letters" } ;
+{ $class-description "The class of letters." } ;
HELP: alpha
-{ $class-description "The class of code points which are alphanumeric" } ;
+{ $class-description "The class of alphanumeric characters." } ;
HELP: blank
-{ $class-description "The class of code points which are whitespace" } ;
+{ $class-description "The class of whitespace characters." } ;
HELP: character
-{ $class-description "The class of numbers which are pre-defined Unicode code points" } ;
+{ $class-description "The class of pre-defined Unicode code points." } ;
HELP: control
-{ $class-description "The class of control characters" } ;
+{ $class-description "The class of control characters." } ;
HELP: digit
-{ $class-description "The class of code coints which are digits" } ;
+{ $class-description "The class of digits." } ;
HELP: letter
-{ $class-description "The class of code points which are lower-cased letters" } ;
+{ $class-description "The class of lower-cased letters." } ;
HELP: printable
-{ $class-description "The class of characters which are printable, as opposed to being control or formatting characters" } ;
+{ $class-description "The class of characters which are printable, as opposed to being control or formatting characters." } ;
HELP: uncased
-{ $class-description "The class of letters which don't have a case" } ;
+{ $class-description "The class of letters which don't have a case." } ;
ARTICLE: "unicode.categories" "Character classes"
-{ $vocab-link "unicode.categories" } " is a vocabulary which provides predicates for determining if a code point has a particular property, for example being a lower cased letter. These should be used in preference to the " { $vocab-link "ascii" } " equivalents in most cases. Below are links to classes of characters, but note that each of these also has a predicate defined, which is usually more useful."
+"The " { $vocab-link "unicode.categories" } " vocabulary implements predicates for determining if a code point has a particular property, for example being a lower cased letter. These should be used in preference to the " { $vocab-link "ascii" } " equivalents in most cases. Each character class has an associated predicate word."
{ $subsection blank }
+{ $subsection blank? }
{ $subsection letter }
+{ $subsection letter? }
{ $subsection LETTER }
+{ $subsection LETTER? }
{ $subsection Letter }
+{ $subsection Letter? }
{ $subsection digit }
+{ $subsection digit? }
{ $subsection printable }
+{ $subsection printable? }
{ $subsection alpha }
+{ $subsection alpha? }
{ $subsection control }
+{ $subsection control? }
{ $subsection uncased }
-{ $subsection character } ;
+{ $subsection uncased? }
+{ $subsection character }
+{ $subsection character? } ;
ABOUT: "unicode.categories"
\r
: filter-ignorable ( weights -- weights' )\r
f swap [\r
- tuck primary>> zero? and\r
+ [ nip ] [ primary>> zero? and ] 2bi\r
[ swap ignorable?>> or ]\r
[ swap completely-ignorable? or not ] 2bi\r
] filter nip ;\r
ABOUT: "unicode.normalize"
ARTICLE: "unicode.normalize" "Unicode normalization"
-"The " { $vocab-link "unicode.normalize" "unicode.normalize" } " vocabulary defines words for normalizing Unicode strings. In Unicode, it is often possible to have multiple sequences of characters which really represent exactly the same thing. For example, to represent e with an acute accent above, there are two possible strings: \"e\\u000301\" (the e character, followed by the combining acute accent character) and \"\\u0000e9\" (a single character, e with an acute accent). There are four normalization forms: NFD, NFC, NFKD, and NFKC. Basically, in NFD and NFKD, everything is expanded, whereas in NFC and NFKC, everything is contracted. In NFKD and NFKC, more things are expanded and contracted. This is a process which loses some information, so it should be done only with care. Most of the world uses NFC to communicate, but for many purposes, NFD/NFKD is easier to process. For more information, see Unicode Standard Annex #15 and section 3 of the Unicode standard."
+"The " { $vocab-link "unicode.normalize" "unicode.normalize" } " vocabulary defines words for normalizing Unicode strings."
+$nl
+"In Unicode, it is often possible to have multiple sequences of characters which really represent exactly the same thing. For example, to represent e with an acute accent above, there are two possible strings: " { $snippet "\"e\\u000301\"" } " (the e character, followed by the combining acute accent character) and " { $snippet "\"\\u0000e9\"" } " (a single character, e with an acute accent)."
+$nl
+"There are four normalization forms: NFD, NFC, NFKD, and NFKC. Basically, in NFD and NFKD, everything is expanded, whereas in NFC and NFKC, everything is contracted. In NFKD and NFKC, more things are expanded and contracted. This is a process which loses some information, so it should be done only with care."
+$nl
+"Most of the world uses NFC to communicate, but for many purposes, NFD/NFKD is easier to process. For more information, see Unicode Standard Annex #15 and section 3 of the Unicode standard."
{ $subsection nfc }
{ $subsection nfd }
{ $subsection nfkc }
HELP: nfc
{ $values { "string" string } { "nfc" "a string in NFC" } }
-{ $description "Converts a string to Normalization Form C" } ;
+{ $description "Converts a string to Normalization Form C." } ;
HELP: nfd
{ $values { "string" string } { "nfd" "a string in NFD" } }
-{ $description "Converts a string to Normalization Form D" } ;
+{ $description "Converts a string to Normalization Form D." } ;
HELP: nfkc
{ $values { "string" string } { "nfkc" "a string in NFKC" } }
-{ $description "Converts a string to Normalization Form KC" } ;
+{ $description "Converts a string to Normalization Form KC." } ;
HELP: nfkd
{ $values { "string" string } { "nfkd" "a string in NFKD" } }
-{ $description "Converts a string to Normalization Form KD" } ;
+{ $description "Converts a string to Normalization Form KD." } ;
-USING: help.markup help.syntax ;
+USING: help.markup help.syntax strings ;
IN: unicode
ARTICLE: "unicode" "Unicode"
-"Unicode is a set of characters, or " { $emphasis "code points" } " covering what's used in most world writing systems. Any Factor string can hold any of these code points transparently; a factor string is a sequence of Unicode code points. Unicode is accompanied by several standard algorithms for common operations like encoding in files, capitalizing a string, finding the boundaries between words, etc. When a programmer is faced with a string manipulation problem, where the string represents human language, a Unicode algorithm is often much better than the naive one. This is not in terms of efficiency, but rather internationalization. Even English text that remains in ASCII is better served by the Unicode collation algorithm than a naive algorithm. The Unicode algorithms implemented here are:"
+"The " { $vocab-link "unicode" } " vocabulary and its sub-vocabularies implement support for the Unicode 5.1 character set."
+$nl
+"The Unicode character set contains most of the world's writing systems. Unicode is intended as a replacement for, and is a superset of, such legacy character sets as ASCII, Latin1, MacRoman, and so on. Unicode characters are called " { $emphasis "code points" } "; Factor's " { $link "strings" } " are sequences of code points."
+$nl
+"The Unicode character set is accompanied by several standard algorithms for common operations like encoding text in files, capitalizing a string, finding the boundaries between words, and so on."
+$nl
+"The Unicode algorithms implemented by the " { $vocab-link "unicode" } " vocabulary are:"
{ $vocab-subsection "Case mapping" "unicode.case" }
{ $vocab-subsection "Collation and weak comparison" "unicode.collation" }
{ $vocab-subsection "Character classes" "unicode.categories" }
"The following are mostly for internal use:"
{ $vocab-subsection "Unicode syntax" "unicode.syntax" }
{ $vocab-subsection "Unicode data tables" "unicode.data" }
-{ $see-also "io.encodings" } ;
+{ $see-also "ascii" "io.encodings" } ;
ABOUT: "unicode"
dup message>> write " (" write errno>> pprint ")" print ;
M: unix-system-call-error error.
- "Unix system call ``" write dup word>> pprint "'' failed:" print
+ "Unix system call “" write dup word>> pprint "†failed:" print
nl
dup message>> write " (" write dup errno>> pprint ")" print
nl
$nl
"Listing all groups:"
{ $subsection all-groups }
-"Returning a passwd tuple:"
"Real groups:"
{ $subsection real-group-name }
{ $subsection real-group-id }
USING: kernel alien.c-types alien.strings sequences math alien.syntax unix
vectors kernel namespaces continuations threads assocs vectors
-io.backend.unix io.encodings.utf8 unix.utilities ;
+io.backend.unix io.encodings.utf8 unix.utilities fry ;
IN: unix.process
! Low-level Unix process launching utilities. These are used
[ [ first ] [ ] bi ] dip exec-with-env ;
: with-fork ( child parent -- )
- [ [ fork-process dup zero? ] dip [ drop ] prepose ] dip
+ [ [ fork-process dup zero? ] dip '[ drop @ ] ] dip
if ; inline
CONSTANT: SIGKILL 9
: change-file-times ( filename access modification -- )
"utimebuf" <c-object>
- tuck set-utimbuf-modtime
- tuck set-utimbuf-actime
+ [ set-utimbuf-modtime ] keep
+ [ set-utimbuf-actime ] keep
[ utime ] unix-system-call drop ;
FUNCTION: int pclose ( void* file ) ;
$nl
"Listing all users:"
{ $subsection all-users }
-"Returning a passwd tuple:"
"Real user:"
{ $subsection real-user-name }
{ $subsection real-user-id }
v-regexp ;
: v-url ( str -- str )
- "URL" R' (ftp|http|https)://\S+' v-regexp ;
+ "URL" R' (?:ftp|http|https)://\S+' v-regexp ;
: v-captcha ( str -- str )
dup empty? [ "must remain blank" throw ] unless ;
] if ;
: own-selection ( prop win -- )
- dpy get -rot CurrentTime XSetSelectionOwner drop
+ [ dpy get ] 2dip CurrentTime XSetSelectionOwner drop
flush-dpy ;
: set-targets-prop ( evt -- )
: set-size-hints ( window -- )
"XSizeHints" <c-object>
USPosition over set-XSizeHints-flags
- dpy get -rot XSetWMNormalHints ;
+ [ dpy get ] 2dip XSetWMNormalHints ;
: auto-position ( window loc -- )
{ 0 0 } = [ drop ] [ set-size-hints ] if ;
USING: accessors kernel xml arrays math generic http.client
combinators hashtables namespaces io base64 sequences strings
calendar xml.data xml.writer xml.utilities assocs math.parser
-debugger calendar.format math.order ;
+debugger calendar.format math.order xml.interpolate xml.dispatch ;
IN: xml-rpc
! * Sending RPC requests
M: integer item>xml
dup 31 2^ neg 31 2^ 1 - between?
[ "Integers must fit in 32 bits" throw ] unless
- number>string "i4" build-tag ;
+ [XML <i4><-></i4> XML] ;
UNION: boolean t POSTPONE: f ;
M: boolean item>xml
- "1" "0" ? "boolean" build-tag ;
+ "1" "0" ? [XML <boolean><-></boolean> XML] ;
M: float item>xml
- number>string "double" build-tag ;
+ number>string [XML <double><-></double> XML] ;
-M: string item>xml ! This should change < and &
- "string" build-tag ;
+M: string item>xml
+ [XML <string><-></string> XML] ;
: struct-member ( name value -- tag )
- swap dup string?
- [ "Struct member name must be string" throw ] unless
- "name" build-tag swap
- item>xml "value" build-tag
- 2array "member" build-tag* ;
+ over string? [ "Struct member name must be string" throw ] unless
+ item>xml
+ [XML
+ <member>
+ <name><-></name>
+ <value><-></value>
+ </member>
+ XML] ;
M: hashtable item>xml
[ struct-member ] { } assoc>map
- "struct" build-tag* ;
+ [XML <struct><-></struct> XML] ;
M: array item>xml
- [ item>xml "value" build-tag ] map
- "data" build-tag* "array" build-tag ;
+ [ item>xml [XML <value><-></value> XML] ] map
+ [XML <array><data><-></data></array> XML] ;
TUPLE: base64 string ;
C: <base64> base64
M: base64 item>xml
- string>> >base64 "base64" build-tag ;
+ string>> >base64
+ [XML <base64><-></base64> XML] ;
: params ( seq -- xml )
- [ item>xml "value" build-tag "param" build-tag ] map
- "params" build-tag* ;
+ [ item>xml [XML <param><value><-></value></param> XML] ] map
+ [XML <params><-></params> XML] ;
: method-call ( name seq -- xml )
- params [ "methodName" build-tag ] dip
- 2array "methodCall" build-tag* build-xml ;
+ params
+ <XML
+ <methodCall>
+ <methodName><-></methodName>
+ <->
+ </methodCall>
+ XML> ;
: return-params ( seq -- xml )
- params "methodResponse" build-tag build-xml ;
+ params <XML <methodResponse><-></methodResponse> XML> ;
: return-fault ( fault-code fault-string -- xml )
[ "faultString" set "faultCode" set ] H{ } make-assoc item>xml
- "value" build-tag "fault" build-tag "methodResponse" build-tag
- build-xml ;
+ <XML
+ <methodResponse>
+ <fault>
+ <value><-></value>
+ </fault>
+ </methodResponse>
+ XML> ;
TUPLE: rpc-method name params ;
: invoke-method ( params method url -- )
[ swap <rpc-method> ] dip post-rpc ;
-
-: put-http-response ( string -- )
- "HTTP/1.1 200 OK\nConnection: close\nContent-Length: " write
- dup length number>string write
- "\nContent-Type: text/xml\nDate: " write
- now timestamp>http-string write "\n\n" write
- write ;
--- /dev/null
+Daniel Ehrenberg
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: kernel namespaces xml.name io.encodings.utf8 xml.elements
+io.encodings.utf16 xml.tokenize xml.state math ascii sequences
+io.encodings.string io.encodings combinators ;
+IN: xml.autoencoding
+
+: continue-make-tag ( str -- tag )
+ parse-name-starting middle-tag end-tag ;
+
+: start-utf16le ( -- tag )
+ utf16le decode-input-if
+ "?\0" expect
+ check instruct ;
+
+: 10xxxxxx? ( ch -- ? )
+ -6 shift 3 bitand 2 = ;
+
+: start<name ( ch -- tag )
+ ascii?
+ [ utf8 decode-input-if next make-tag ] [
+ next
+ [ get-next 10xxxxxx? not ] take-until
+ get-char suffix utf8 decode
+ utf8 decode-input-if next
+ continue-make-tag
+ ] if ;
+
+: start< ( -- tag )
+ get-next {
+ { 0 [ next next start-utf16le ] }
+ { CHAR: ? [ check next next instruct ] } ! XML prolog parsing sets the encoding
+ { CHAR: ! [ check utf8 decode-input next next direct ] }
+ [ check start<name ]
+ } case ;
+
+: skip-utf8-bom ( -- tag )
+ "\u0000bb\u0000bf" expect utf8 decode-input
+ "<" expect check make-tag ;
+
+: decode-expecting ( encoding string -- tag )
+ [ decode-input-if next ] [ expect ] bi* check make-tag ;
+
+: start-utf16be ( -- tag )
+ utf16be "<" decode-expecting ;
+
+: skip-utf16le-bom ( -- tag )
+ utf16le "\u0000fe<" decode-expecting ;
+
+: skip-utf16be-bom ( -- tag )
+ utf16be "\u0000ff<" decode-expecting ;
+
+: start-document ( -- tag )
+ get-char {
+ { CHAR: < [ start< ] }
+ { 0 [ start-utf16be ] }
+ { HEX: EF [ skip-utf8-bom ] }
+ { HEX: FF [ skip-utf16le-bom ] }
+ { HEX: FE [ skip-utf16be-bom ] }
+ { f [ "" ] }
+ [ drop utf8 decode-input-if f ]
+ ! Same problem as with <e`>, in the case of XML chunks?
+ } case check ;
+
--- /dev/null
+Implements the automatic detection of encodings of XML documents
+++ /dev/null
-! Copyright (C) 2008 Daniel Ehrenberg
-! See http://factorcode.org/license.txt for BSD license.
-IN: xml.backend
-
-! A stack of { tag children } pairs
-SYMBOL: xml-stack
! Copyright (C) 2005, 2007 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
-USING: kernel sequences unicode.syntax math math.order ;
+USING: kernel sequences unicode.syntax math math.order combinators ;
IN: xml.char-classes
-CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_ ;
+CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_: ;
: 1.0name-start? ( char -- ? )
dup 1.0name-start*? [ drop t ]
[ HEX: 2BB HEX: 2C1 between? ] if ;
-CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387 ;
+CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387: ;
-CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _ ;
+CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _: ;
-CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7 ;
+CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7: ;
: name-start? ( 1.0? char -- ? )
swap [ 1.0name-start? ] [ 1.1name-start? ] if ;
: name-char? ( 1.0? char -- ? )
swap [ 1.0name-char? ] [ 1.1name-char? ] if ;
+
+: text? ( 1.0? char -- ? )
+ ! 1.0:
+ ! #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
+ ! 1.1:
+ ! [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
+ {
+ { [ dup HEX: 20 < ] [ "\t\r\n" member? and ] }
+ { [ nip dup HEX: D800 < ] [ drop t ] }
+ { [ dup HEX: E000 < ] [ drop f ] }
+ [ { HEX: FFFE HEX: FFFF } member? not ]
+ } cond ;
--- /dev/null
+XML-related character classes
--- /dev/null
+USING: help.markup help.syntax sequences strings ;
+IN: xml.data
+
+ABOUT: "xml.data"
+
+ARTICLE: "xml.data" "XML data types"
+"The " { $vocab-link "xml.data" } " vocabulary defines a simple document object model for XML. Everything is simply a tuple and can be manipulated as such."
+{ $subsection { "xml.data" "classes" } }
+{ $subsection { "xml.data" "constructors" } }
+"Simple words for manipulating names:"
+ { $subsection names-match? }
+ { $subsection assure-name }
+"For high-level tools for manipulating XML, see " { $vocab-link "xml.utilities" } ;
+
+ARTICLE: { "xml.data" "classes" } "XML data classes"
+ "Data types that XML documents are made of:"
+ { $subsection name }
+ { $subsection tag }
+ { $subsection contained-tag }
+ { $subsection open-tag }
+ { $subsection xml }
+ { $subsection prolog }
+ { $subsection comment }
+ { $subsection instruction }
+ { $subsection element-decl }
+ { $subsection attlist-decl }
+ { $subsection entity-decl }
+ { $subsection system-id }
+ { $subsection public-id }
+ { $subsection doctype-decl }
+ { $subsection notation-decl } ;
+
+ARTICLE: { "xml.data" "constructors" } "XML data constructors"
+ "These data types are constructed with:"
+ { $subsection <name> }
+ { $subsection <tag> }
+ { $subsection <contained-tag> }
+ { $subsection <xml> }
+ { $subsection <prolog> }
+ { $subsection <comment> }
+ { $subsection <instruction> }
+ { $subsection <simple-name> }
+ { $subsection <element-decl> }
+ { $subsection <attlist-decl> }
+ { $subsection <entity-decl> }
+ { $subsection <system-id> }
+ { $subsection <public-id> }
+ { $subsection <doctype-decl> }
+ { $subsection <notation-decl> } ;
+
+HELP: tag
+{ $class-description "Tuple representing an XML tag, delegating to a " { $link
+name } ", containing the slots attrs (an alist of names to strings) and children (a sequence). Tags implement the sequence protocol by acting like a sequence of its chidren, and the assoc protocol by acting like its attributes." }
+{ $see-also <tag> name contained-tag xml } ;
+
+HELP: <tag>
+{ $values { "name" "an XML tag name" }
+ { "attrs" "an alist of names to strings" }
+ { "children" sequence }
+ { "tag" tag } }
+{ $description "Constructs an XML " { $link tag } " with the name (not a string) and tag attributes specified in attrs and children specified." }
+{ $see-also tag <contained-tag> } ;
+
+HELP: name
+{ $class-description "Represents an XML name, with the fields space (a string representing the namespace, as written in the document, tag (a string of the actual name of the tag) and url (a string of the URL that the namespace points to)." }
+{ $see-also <name> tag } ;
+
+HELP: <name>
+{ $values { "space" "a string" } { "main" "a string" } { "url" "a string" }
+ { "name" "an XML tag name" } }
+{ $description "Creates a name tuple with the namespace prefix space, the the given main part of the name, and the namespace URL given by url." }
+{ $see-also name <tag> } ;
+
+HELP: contained-tag
+{ $class-description "This is a subclass of " { $link tag } " consisting of tags with no body, like " { $snippet "<a/>" } "." }
+{ $see-also tag <contained-tag> } ;
+
+HELP: <contained-tag>
+{ $values { "name" "an XML tag name" }
+ { "attrs" "an alist from names to strings" }
+ { "tag" tag } }
+{ $description "Creates an empty tag (like " { $snippet "<a/>" } ") with the specified name and tag attributes." }
+{ $see-also contained-tag <tag> } ;
+
+HELP: xml
+{ $class-description "Tuple representing an XML document, delegating to the main tag, containing the fields prolog (the header " { $snippet "<?xml...?>" } "), before (whatever comes between the prolog and the main tag) and after (whatever comes after the main tag)." }
+{ $see-also <xml> tag prolog } ;
+
+HELP: <xml>
+{ $values { "prolog" "an XML prolog" } { "before" "a sequence of XML elements" }
+{ "body" tag } { "after" "a sequence of XML elements" } { "xml" "an XML document" } }
+{ $description "creates an XML document, delegating to the main tag, with the specified prolog, before, and after" }
+{ $see-also xml <tag> } ;
+
+HELP: prolog
+{ $class-description "represents an XML prolog, with the tuple fields version (containing \"1.0\" or \"1.1\"), encoding (a string representing the encoding type), and standalone (t or f, whether the document is standalone without external entities)" }
+{ $see-also <prolog> xml } ;
+
+HELP: <prolog>
+{ $values { "version" "a string, 1.0 or 1.1" }
+{ "encoding" "a string" } { "standalone" "a boolean" } { "prolog" "an XML prolog" } }
+{ $description "creates an XML prolog tuple" }
+{ $see-also prolog <xml> } ;
+
+HELP: comment
+{ $class-description "represents a comment in XML. Has one slot, text, which contains the string of the comment" }
+{ $see-also <comment> } ;
+
+HELP: <comment>
+{ $values { "text" "a string" } { "comment" "a comment" } }
+{ $description "creates an XML comment tuple" }
+{ $see-also comment } ;
+
+HELP: instruction
+{ $class-description "represents an XML instruction, such as <?xsl stylesheet='foo.xml'?>. Contains one slot, text, which contains the string between the question marks." }
+{ $see-also <instruction> } ;
+
+HELP: <instruction>
+{ $values { "text" "a string" } { "instruction" "an XML instruction" } }
+{ $description "creates an XML parsing instruction, such as <?xsl stylesheet='foo.xml'?>." }
+{ $see-also instruction } ;
+
+HELP: opener
+{ $class-description "describes an opening tag, like <a>. Contains two slots, name and attrs containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." }
+{ $see-also closer contained } ;
+
+HELP: closer
+{ $class-description "describes a closing tag, like </a>. Contains one slot, name, containing the tag's name. Usually, the name-url will be f." }
+{ $see-also opener contained } ;
+
+HELP: contained
+{ $class-description "represents a self-closing tag, like <a/>. Contains two slots, name and attrs containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." }
+{ $see-also opener closer } ;
+
+HELP: open-tag
+{ $class-description "represents a tag that does have children, ie is not a contained tag" }
+{ $notes "the constructor used for this class is simply " { $link <tag> } "." }
+{ $see-also tag contained-tag } ;
+
+HELP: names-match?
+{ $values { "name1" "a name" } { "name2" "a name" } { "?" "t or f" } }
+{ $description "checks to see if the two names match, that is, if all fields are equal, ignoring fields whose value is f in either name." }
+{ $example "USING: prettyprint xml.data ;" "T{ name f \"rpc\" \"methodCall\" f } T{ name f f \"methodCall\" \"http://www.xmlrpc.org/\" } names-match? ." "t" }
+{ $see-also name } ;
+
+HELP: assure-name
+{ $values { "string/name" "a string or a name" } { "name" "a name" } }
+{ $description "Converts a string into an XML name, if it is not already a name." } ;
+
+HELP: <simple-name>
+{ $values { "string" string } { "name" name } }
+{ $description "Converts a string into an XML name with an empty prefix and URL." } ;
+
+HELP: element-decl
+{ $class-description "Describes the class of element declarations, like <!ELEMENT greeting (#PCDATA)>." } ;
+
+HELP: <element-decl>
+{ $values { "name" name } { "content-spec" string } { "element-decl" entity-decl } }
+{ $description "Creates an element declaration object, of the class " { $link element-decl } } ;
+
+HELP: attlist-decl
+{ $class-description "Describes the class of element declarations, like " { $snippet "<!ATTLIST pre xml:space (preserve) #FIXED 'preserve'>" } "." } ;
+
+HELP: <attlist-decl>
+{ $values { "name" name } { "att-defs" string } { "attlist-decl" attlist-decl } }
+{ $description "Creates an element declaration object, of the class " { $link attlist-decl } } ;
+
+HELP: entity-decl
+{ $class-description "Describes the class of element declarations, like " { $snippet "<!ENTITY foo 'bar'>" } "." } ;
+
+HELP: <entity-decl>
+{ $values { "name" name } { "def" string } { "pe?" "t or f" } { "entity-decl" entity-decl } }
+{ $description "Creates an entity declaration object, of the class " { $link entity-decl } ". The pe? slot should be t if the object is a DTD-internal entity, like " { $snippet "<!ENTITY % foo 'bar'>" } " and f if the object is like " { $snippet "<!ENTITY foo 'bar'>" } ", that is, it can be used outside of the DTD." } ;
+
+HELP: system-id
+{ $class-description "Describes the class of system identifiers within an XML DTD directive, such as " { $snippet "<!DOCTYPE greeting " { $emphasis "SYSTEM 'hello.dtd'" } ">" } } ;
+
+HELP: <system-id>
+{ $values { "system-literal" string } { "system-id" system-id } }
+{ $description "Constructs a " { $link system-id } " tuple." } ;
+
+HELP: public-id
+{ $class-description "Describes the class of public identifiers within an XML DTD directive, such as " { $snippet "<!DOCTYPE open-hatch " { $emphasis "PUBLIC '-//Textuality//TEXT Standard open-hatch boilerplate//EN' 'http://www.textuality.com/boilerplate/OpenHatch.xml'" } ">" } } ;
+
+HELP: <public-id>
+{ $values { "pubid-literal" string } { "system-literal" string } { "public-id" public-id } }
+{ $description "Constructs a " { $link system-id } " tuple." } ;
+
+HELP: notation-decl
+{ $class-description "Describes the class of element declarations, like " { $snippet "<!NOTATION jpg SYSTEM './jpgviewer'>" } "." } ;
+
+HELP: <notation-decl>
+{ $values { "name" string } { "id" id } { "notation-decl" notation-decl } }
+{ $description "Creates an notation declaration object, of the class " { $link notation-decl } "." } ;
+
+HELP: doctype-decl
+{ $class-description "Describes the class of doctype declarations." } ;
+
+HELP: <doctype-decl>
+{ $values { "name" name } { "external-id" id } { "internal-subset" sequence } { "doctype-decl" doctype-decl } }
+{ $description "Creates a new doctype declaration object, of the class " { $link doctype-decl } ". Only one of external-id or internal-subset will be non-null." } ;
-! Copyright (C) 2005, 2006 Daniel Ehrenberg
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences sequences.private assocs arrays
delegate.protocols delegate vectors accessors multiline
-macros words quotations combinators slots fry ;
+macros words quotations combinators slots fry strings ;
IN: xml.data
-TUPLE: name space main url ;
+TUPLE: interpolated var ;
+C: <interpolated> interpolated
+
+UNION: nullable-string string POSTPONE: f ;
+
+TUPLE: name
+ { space nullable-string }
+ { main string }
+ { url nullable-string } ;
C: <name> name
: ?= ( object/f object/f -- ? )
[ [ main>> ] bi@ ?= ] 2tri and and ;
: <simple-name> ( string -- name )
+ "" swap f <name> ;
+
+: <null-name> ( string -- name )
f swap f <name> ;
: assure-name ( string/name -- name )
- dup name? [ <simple-name> ] unless ;
-
-TUPLE: opener name attrs ;
-C: <opener> opener
-
-TUPLE: closer name ;
-C: <closer> closer
-
-TUPLE: contained name attrs ;
-C: <contained> contained
-
-TUPLE: comment text ;
-C: <comment> comment
-
-TUPLE: directive ;
-
-TUPLE: element-decl < directive name content-spec ;
-C: <element-decl> element-decl
+ dup name? [ <null-name> ] unless ;
-TUPLE: attlist-decl < directive name att-defs ;
-C: <attlist-decl> attlist-decl
-
-TUPLE: entity-decl < directive name def ;
-C: <entity-decl> entity-decl
-
-TUPLE: system-id system-literal ;
-C: <system-id> system-id
-
-TUPLE: public-id pubid-literal system-literal ;
-C: <public-id> public-id
-
-TUPLE: doctype-decl < directive name external-id internal-subset ;
-C: <doctype-decl> doctype-decl
-
-TUPLE: instruction text ;
-C: <instruction> instruction
-
-TUPLE: prolog version encoding standalone ;
-C: <prolog> prolog
-
-TUPLE: attrs alist ;
+TUPLE: attrs { alist sequence } ;
C: <attrs> attrs
: attr@ ( key alist -- index {key,value} )
M: attrs clear-assoc
f >>alist drop ;
M: attrs delete-at
- tuck attr@ drop [ swap alist>> delete-nth ] [ drop ] if* ;
+ [ nip ] [ attr@ drop ] 2bi
+ [ swap alist>> delete-nth ] [ drop ] if* ;
M: attrs clone
alist>> clone <attrs> ;
INSTANCE: attrs assoc
-TUPLE: tag name attrs children ;
+TUPLE: opener { name name } { attrs attrs } ;
+C: <opener> opener
+
+TUPLE: closer { name name } ;
+C: <closer> closer
+
+TUPLE: contained { name name } { attrs attrs } ;
+C: <contained> contained
+
+TUPLE: comment { text string } ;
+C: <comment> comment
+
+TUPLE: directive ;
+
+TUPLE: element-decl < directive
+ { name string }
+ { content-spec string } ;
+C: <element-decl> element-decl
+
+TUPLE: attlist-decl < directive
+ { name string }
+ { att-defs string } ;
+C: <attlist-decl> attlist-decl
+
+UNION: boolean t POSTPONE: f ;
+
+TUPLE: entity-decl < directive
+ { name string }
+ { def string }
+ { pe? boolean } ;
+C: <entity-decl> entity-decl
+
+TUPLE: system-id { system-literal string } ;
+C: <system-id> system-id
+
+TUPLE: public-id { pubid-literal string } { system-literal string } ;
+C: <public-id> public-id
+
+UNION: id system-id public-id POSTPONE: f ;
+
+TUPLE: dtd
+ { directives sequence }
+ { entities assoc }
+ { parameter-entities assoc } ;
+C: <dtd> dtd
+
+UNION: dtd/f dtd POSTPONE: f ;
+
+TUPLE: doctype-decl < directive
+ { name string }
+ { external-id id }
+ { internal-subset dtd/f } ;
+C: <doctype-decl> doctype-decl
+
+TUPLE: notation-decl < directive
+ { name string }
+ { id string } ;
+C: <notation-decl> notation-decl
+
+TUPLE: instruction { text string } ;
+C: <instruction> instruction
+
+TUPLE: prolog
+ { version string }
+ { encoding string }
+ { standalone boolean } ;
+C: <prolog> prolog
+
+TUPLE: tag
+ { name name }
+ { attrs attrs }
+ { children sequence } ;
: <tag> ( name attrs children -- tag )
[ assure-name ] [ T{ attrs } assoc-like ] [ ] tri*
M: tag clone
tag clone-slots ;
-TUPLE: xml prolog before body after ;
+TUPLE: xml
+ { prolog prolog }
+ { before sequence }
+ { body tag }
+ { after sequence } ;
C: <xml> xml
CONSULT: sequence-protocol xml body>> ;
PREDICATE: contained-tag < tag children>> not ;
PREDICATE: open-tag < tag children>> ;
+
+UNION: xml-data
+ tag comment string directive instruction ;
+
+TUPLE: unescaped string ;
+C: <unescaped> unescaped
--- /dev/null
+Contains XML data types and basic tools for manipulation
--- /dev/null
+collections
+assocs
--- /dev/null
+Daniel Ehrenberg
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.markup help.syntax ;
+IN: xml.dispatch
+
+ABOUT: "xml.dispatch"
+
+ARTICLE: "xml.dispatch" "Dispatch on XML tag names"
+"Two parsing words define a system, analogous to generic words, for processing XML. A word can dispatch off the name of the tag that is passed to it. To define such a word, use"
+{ $subsection POSTPONE: PROCESS: }
+"and to define a new 'method' for this word, use"
+{ $subsection POSTPONE: TAG: } ;
+
+HELP: PROCESS:
+{ $syntax "PROCESS: word" }
+{ $values { "word" "a new word to define" } }
+{ $description "creates a new word to process XML tags" }
+{ $see-also POSTPONE: TAG: } ;
+
+HELP: TAG:
+{ $syntax "TAG: tag word definition... ;" }
+{ $values { "tag" "an xml tag name" } { "word" "an XML process" } }
+{ $description "defines what a process should do when it encounters a specific tag" }
+{ $examples { $code "PROCESS: x ( tag -- )\nTAG: a x drop \"hi\" write ;" } }
+{ $see-also POSTPONE: PROCESS: } ;
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: xml io kernel math sequences strings xml.utilities
+tools.test math.parser xml.dispatch ;
+IN: xml.dispatch.tests
+
+PROCESS: calculate ( tag -- n )
+
+: calc-2children ( tag -- n n )
+ children-tags first2 [ calculate ] dip calculate ;
+
+TAG: number calculate
+ children>string string>number ;
+TAG: add calculate
+ calc-2children + ;
+TAG: minus calculate
+ calc-2children - ;
+TAG: times calculate
+ calc-2children * ;
+TAG: divide calculate
+ calc-2children / ;
+TAG: neg calculate
+ children-tags first calculate neg ;
+
+: calc-arith ( string -- n )
+ string>xml first-child-tag calculate ;
+
+[ 32 ] [
+ "<math><times><add><number>1</number><number>3</number></add><neg><number>-8</number></neg></times></math>"
+ calc-arith
+] unit-test
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: words assocs kernel accessors parser sequences summary
+lexer splitting fry ;
+IN: xml.dispatch
+
+TUPLE: process-missing process tag ;
+M: process-missing summary
+ drop "Tag not implemented on process" ;
+
+: run-process ( tag word -- )
+ 2dup "xtable" word-prop
+ [ dup main>> ] dip at* [ 2nip call ] [
+ drop \ process-missing boa throw
+ ] if ;
+
+: PROCESS:
+ CREATE
+ dup H{ } clone "xtable" set-word-prop
+ dup '[ _ run-process ] define ; parsing
+
+: TAG:
+ scan scan-word
+ parse-definition
+ swap "xtable" word-prop
+ rot "/" split [ [ 2dup ] dip swap set-at ] each 2drop ;
+ parsing
--- /dev/null
+'Generic words' that dispatch on XML tag names
--- /dev/null
+Daniel Ehrenberg
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg, Slava Pestov
+! See http://factorcode.org/license.txt for BSD license.
+USING: xml.tokenize xml.data xml.state kernel sequences ascii
+fry xml.errors combinators hashtables namespaces xml.entities
+strings xml.name ;
+IN: xml.dtd
+
+: take-decl-contents ( -- first second )
+ pass-blank take-word pass-blank ">" take-string ;
+
+: take-element-decl ( -- element-decl )
+ take-decl-contents <element-decl> ;
+
+: take-attlist-decl ( -- attlist-decl )
+ take-decl-contents <attlist-decl> ;
+
+: take-notation-decl ( -- notation-decl )
+ take-decl-contents <notation-decl> ;
+
+UNION: dtd-acceptable
+ directive comment instruction ;
+
+: take-entity-def ( var -- entity-name entity-def )
+ [
+ take-word pass-blank get-char {
+ { CHAR: ' [ parse-quote ] }
+ { CHAR: " [ parse-quote ] }
+ [ drop take-external-id close ]
+ } case
+ ] dip '[ swap _ [ ?set-at ] change ] 2keep ;
+
+: take-entity-decl ( -- entity-decl )
+ pass-blank get-char {
+ { CHAR: % [ next pass-blank pe-table take-entity-def t ] }
+ [ drop extra-entities take-entity-def f ]
+ } case close <entity-decl> ;
+
+: take-inner-directive ( string -- directive )
+ {
+ { "ELEMENT" [ take-element-decl ] }
+ { "ATTLIST" [ take-attlist-decl ] }
+ { "ENTITY" [ take-entity-decl ] }
+ { "NOTATION" [ take-notation-decl ] }
+ [ bad-directive ]
+ } case ;
--- /dev/null
+Implements the parsing of directives in DTDs
--- /dev/null
+Daniel Ehrenberg
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: kernel namespaces xml.tokenize xml.state xml.name
+xml.data accessors arrays make xml.char-classes fry assocs sequences
+math xml.errors sets combinators io.encodings io.encodings.iana
+unicode.case xml.dtd strings xml.entities unicode.categories ;
+IN: xml.elements
+
+: take-interpolated ( quot -- interpolated )
+ interpolating? get [
+ drop get-char CHAR: > =
+ [ next f ]
+ [ "->" take-string [ blank? ] trim ]
+ if <interpolated>
+ ] [ call ] if ; inline
+
+: interpolate-quote ( -- interpolated )
+ [ quoteless-attr ] take-interpolated ;
+
+: parse-attr ( -- )
+ parse-name pass-blank "=" expect pass-blank
+ get-char CHAR: < =
+ [ "<-" expect interpolate-quote ]
+ [ t parse-quote* ] if 2array , ;
+
+: start-tag ( -- name ? )
+ #! Outputs the name and whether this is a closing tag
+ get-char CHAR: / = dup [ next ] when
+ parse-name swap ;
+
+: (middle-tag) ( -- )
+ pass-blank version=1.0? get-char name-start?
+ [ parse-attr (middle-tag) ] when ;
+
+: assure-no-duplicates ( attrs-alist -- attrs-alist )
+ H{ } clone 2dup '[ swap _ push-at ] assoc-each
+ [ nip length 2 >= ] assoc-filter >alist
+ [ first first2 duplicate-attr ] unless-empty ;
+
+: middle-tag ( -- attrs-alist )
+ ! f make will make a vector if it has any elements
+ [ (middle-tag) ] f make pass-blank
+ assure-no-duplicates ;
+
+: end-tag ( name attrs-alist -- tag )
+ tag-ns pass-blank get-char CHAR: / =
+ [ pop-ns <contained> next ">" expect ]
+ [ depth inc <opener> close ] if ;
+
+: take-comment ( -- comment )
+ "--" expect
+ "--" take-string
+ <comment>
+ ">" expect ;
+
+: assure-no-extra ( seq -- )
+ [ first ] map {
+ T{ name f "" "version" f }
+ T{ name f "" "encoding" f }
+ T{ name f "" "standalone" f }
+ } diff
+ [ extra-attrs ] unless-empty ;
+
+: good-version ( version -- version )
+ dup { "1.0" "1.1" } member? [ bad-version ] unless ;
+
+: prolog-version ( alist -- version )
+ T{ name f "" "version" f } swap at
+ [ good-version ] [ versionless-prolog ] if* ;
+
+: prolog-encoding ( alist -- encoding )
+ T{ name f "" "encoding" f } swap at "UTF-8" or ;
+
+: yes/no>bool ( string -- t/f )
+ {
+ { "yes" [ t ] }
+ { "no" [ f ] }
+ [ not-yes/no ]
+ } case ;
+
+: prolog-standalone ( alist -- version )
+ T{ name f "" "standalone" f } swap at
+ [ yes/no>bool ] [ f ] if* ;
+
+: prolog-attrs ( alist -- prolog )
+ [ prolog-version ]
+ [ prolog-encoding ]
+ [ prolog-standalone ]
+ tri <prolog> ;
+
+SYMBOL: string-input?
+: decode-input-if ( encoding -- )
+ string-input? get [ drop ] [ decode-input ] if ;
+
+: parse-prolog ( -- prolog )
+ pass-blank middle-tag "?>" expect
+ dup assure-no-extra prolog-attrs
+ dup encoding>> dup "UTF-16" =
+ [ drop ] [ name>encoding [ decode-input-if ] when* ] if
+ dup prolog-data set ;
+
+: instruct ( -- instruction )
+ take-name {
+ { [ dup "xml" = ] [ drop parse-prolog ] }
+ { [ dup >lower "xml" = ] [ capitalized-prolog ] }
+ { [ dup valid-name? not ] [ bad-name ] }
+ [ "?>" take-string append <instruction> ]
+ } cond ;
+
+: take-cdata ( -- string )
+ depth get zero? [ bad-cdata ] when
+ "[CDATA[" expect "]]>" take-string ;
+
+DEFER: make-tag ! Is this unavoidable?
+
+: expand-pe ( -- ) ; ! Make this run the contents of the pe within a DOCTYPE
+
+: dtd-loop ( -- )
+ pass-blank get-char {
+ { CHAR: ] [ next ] }
+ { CHAR: % [ expand-pe ] }
+ { CHAR: < [
+ next make-tag dup dtd-acceptable?
+ [ bad-doctype ] unless , dtd-loop
+ ] }
+ { f [ ] }
+ [ 1string bad-doctype ]
+ } case ;
+
+: take-internal-subset ( -- dtd )
+ [
+ H{ } clone pe-table set
+ t in-dtd? set
+ dtd-loop
+ pe-table get
+ ] { } make swap extra-entities get swap <dtd> ;
+
+: take-optional-id ( -- id/f )
+ get-char "SP" member?
+ [ take-external-id ] [ f ] if ;
+
+: take-internal ( -- dtd/f )
+ get-char CHAR: [ =
+ [ next take-internal-subset ] [ f ] if ;
+
+: take-doctype-decl ( -- doctype-decl )
+ pass-blank take-name
+ pass-blank take-optional-id
+ pass-blank take-internal
+ <doctype-decl> close ;
+
+: take-directive ( -- doctype )
+ take-name dup "DOCTYPE" =
+ [ drop take-doctype-decl ] [
+ in-dtd? get
+ [ take-inner-directive ]
+ [ misplaced-directive ] if
+ ] if ;
+
+: direct ( -- object )
+ get-char {
+ { CHAR: - [ take-comment ] }
+ { CHAR: [ [ take-cdata ] }
+ [ drop take-directive ]
+ } case ;
+
+: normal-tag ( -- tag )
+ start-tag
+ [ dup add-ns pop-ns <closer> depth dec close ]
+ [ middle-tag end-tag ] if ;
+
+: interpolate-tag ( -- interpolated )
+ [ "-" bad-name ] take-interpolated ;
+
+: make-tag ( -- tag )
+ {
+ { [ get-char dup CHAR: ! = ] [ drop next direct ] }
+ { [ dup CHAR: ? = ] [ drop next instruct ] }
+ { [ dup CHAR: - = ] [ drop next interpolate-tag ] }
+ [ drop normal-tag ]
+ } cond ;
--- /dev/null
+Implements the parsing of XML tags
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.markup help.syntax ;
+IN: xml.entities
+
+ABOUT: "xml.entities"
+
+ARTICLE: "xml.entities" "XML entities"
+ "When XML is parsed, entities like &foo; are replaced with the characters they represent. A few entities like & and < are defined by default, but more are available, and the set of entities can be customized. Below are some words involved in XML entities, defined in the vocabulary 'entities':"
+ { $subsection entities }
+ { $subsection with-entities }
+"For entities used in HTML/XHTML, see " { $vocab-link "xml.entities.html" } ;
+
+HELP: entities
+{ $description "a hash table from default XML entity names (like & and <) to the characters they represent. This is automatically included when parsing any XML document." }
+{ $see-also with-entities } ;
+
+HELP: with-entities
+{ $values { "entities" "a hash table of strings to chars" }
+ { "quot" "a quotation ( -- )" } }
+{ $description "calls the quotation using the given table of entity values (symbolizing, eg, that &foo; represents CHAR: a) on top of the default XML entities" } ;
+
! Copyright (C) 2005, 2006 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: namespaces make kernel assocs sequences fry values
-io.files io.encodings.binary ;
+io.files io.encodings.binary xml.state ;
IN: xml.entities
: entities-out
{ CHAR: & "&" }
{ CHAR: ' "'" }
{ CHAR: " """ }
+ { CHAR: < "<" }
} ;
: escape-string-by ( str table -- escaped )
{ "quot" CHAR: " }
} ;
-SYMBOL: extra-entities
-
: with-entities ( entities quot -- )
[ swap extra-entities set call ] with-scope ; inline
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.markup help.syntax xml.entities ;
+IN: xml.entities.html
+
+ARTICLE: "xml.entities.html" "HTML entities"
+{ $vocab-link "xml.entities.html" } " defines words for using entities defined in HTML/XHTML."
+ { $subsection html-entities }
+ { $subsection with-html-entities } ;
+
+HELP: html-entities
+{ $description "a hash table from HTML entity names to their character values" }
+{ $see-also entities with-html-entities } ;
+
+HELP: with-html-entities
+{ $values { "quot" "a quotation ( -- )" } }
+{ $description "calls the given quotation using HTML entity values" }
+{ $see-also html-entities with-entities } ;
! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: assocs io.encodings.binary io.files kernel namespaces sequences
-values xml xml.entities ;
+values xml xml.entities accessors xml.state ;
IN: xml.entities.html
VALUE: html-entities
: read-entities-file ( file -- table )
- f swap binary <file-reader>
- [ 2drop extra-entities get ] sax ;
+ file>dtd entities>> ;
: get-html ( -- table )
{ "lat1" "special" "symbol" } [
--- /dev/null
+Contains built-in XML entities
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.markup help.syntax ;
+IN: xml.errors
+
+HELP: multitags
+{ $class-description "XML parsing error describing the case where there is more than one main tag in a document. Contains no slots" } ;
+
+HELP: notags
+{ $class-description "XML parsing error describing the case where an XML document contains no main tag, or any tags at all" } ;
+
+HELP: extra-attrs
+{ $class-description "XML parsing error describing the case where the XML prolog (" { $snippet "<?xml ...?>" } ") contains attributes other than the three allowed ones, standalone, version and encoding. Contains one slot, attrs, which is a hashtable of all the extra attributes' names. Delegates to " { $link xml-error-at } "." } ;
+
+HELP: nonexist-ns
+{ $class-description "XML parsing error describing the case where a namespace doesn't exist but it is used in a tag. Contains one slot, name, which contains the name of the undeclared namespace, and delegates to " { $link xml-error-at } "." } ;
+
+HELP: not-yes/no
+{ $class-description "XML parsing error used to describe the case where standalone is set in the XML prolog to something other than 'yes' or 'no'. Delegates to " { $link xml-error-at } " and contains one slot, text, which contains offending value." } ;
+
+HELP: unclosed
+{ $class-description "XML parsing error used to describe the case where the XML document contains classes which are not closed by the end of the document. Contains one slot, tags, a sequence of names." } ;
+
+HELP: mismatched
+{ $class-description "XML parsing error describing mismatched tags, eg " { $snippet "<a></c>" } ". Contains two slots: open is the name of the opening tag and close is the name of the closing tag. Delegates to " { $link xml-error-at } " showing the location of the closing tag" } ;
+
+HELP: expected
+{ $class-description "XML parsing error describing when an expected token was not present. Delegates to " { $link xml-error-at } ". Contains two slots, should-be, which has the expected string, and was, which has the actual string." } ;
+
+HELP: no-entity
+{ $class-description "XML parsing error describing the use of an undefined entity in a case where standalone is marked yes. Delegates to " { $link xml-error-at } ". Contains one slot, thing, containing a string representing the entity." } ;
+
+
+HELP: pre/post-content
+{ $class-description "Describes the error where a non-whitespace string is used before or after the main tag in an XML document. Contains two slots: string contains the offending string, and pre? is t if it occured before the main tag and f if it occured after" } ;
+
+HELP: unclosed-quote
+{ $class-description "Describes the error where a quotation for an attribute value is opened but not closed before the end of the document." } ;
+
+HELP: bad-name
+{ $class-description "Describes the error where a name is used, for example in an XML tag or attribute key, which is invalid." } ;
+
+HELP: quoteless-attr
+{ $class-description "Describes the error where an attribute of an XML tag is missing quotes around a value." } ;
+
+HELP: disallowed-char
+{ $class-description "Describes the error where a disallowed character occurs in an XML document." } ;
+
+HELP: missing-close
+{ $class-description "Describes the error where a particular closing token is missing." } ;
+
+HELP: unexpected-end
+{ $class-description "Describes the error where a document unexpectedly ends, and the XML parser expected it to continue." } ;
+
+HELP: duplicate-attr
+{ $class-description "Describes the error where there is more than one attribute of the same key." } ;
+
+HELP: bad-cdata
+{ $class-description "Describes the error where CDATA is used outside of the main tag of an XML document." } ;
+
+HELP: text-w/]]>
+{ $class-description "Describes the error where a text node contains the literal string " { $snippet "]]>" } " which is disallowed." } ;
+
+HELP: attr-w/<
+{ $class-description "Describes the error where an attribute value contains the literal character " { $snippet "<" } " which is disallowed." } ;
+
+HELP: misplaced-directive
+{ $class-description "Describes the error where an internal DTD directive is used outside of a DOCTYPE or DTD file, or where a DOCTYPE occurs somewhere other than before the main tag of an XML document." } ;
+
+HELP: xml-error
+{ $class-description "The exception class that all parsing errors in XML documents are in." } ;
+
+ARTICLE: "xml.errors" "XML parsing errors"
+"The " { $vocab-link "xml.errors" } " vocabulary provides a rich and highly inspectable set of parsing errors. All XML errors are described by the union class " { $link xml-error } " but there are many classes contained in that:"
+ { $subsection multitags }
+ { $subsection notags }
+ { $subsection extra-attrs }
+ { $subsection nonexist-ns }
+ { $subsection not-yes/no }
+ { $subsection unclosed }
+ { $subsection mismatched }
+ { $subsection expected }
+ { $subsection no-entity }
+ { $subsection pre/post-content }
+ { $subsection unclosed-quote }
+ { $subsection bad-name }
+ { $subsection quoteless-attr }
+ { $subsection disallowed-char }
+ { $subsection missing-close }
+ { $subsection unexpected-end }
+ { $subsection duplicate-attr }
+ { $subsection bad-cdata }
+ { $subsection text-w/]]> }
+ { $subsection attr-w/< }
+ { $subsection misplaced-directive }
+ "Additionally, most of these errors are a kind of " { $link xml-error-at } " which provides more information"
+ $nl
+ "Note that, in parsing an XML document, only the first error is reported." ;
+
+ABOUT: "xml.errors"
USING: continuations xml xml.errors tools.test kernel arrays
-xml.data state-parser quotations fry ;
+xml.data quotations fry ;
IN: xml.errors.tests
: xml-error-test ( expected-error xml-string -- )
xml-error-test
T{ pre/post-content f "x" t } "x<y/>" xml-error-test
T{ versionless-prolog f 1 8 } "<?xml?><x/>" xml-error-test
-T{ bad-instruction f 1 11 T{ instruction f "xsl" } }
- "<x><?xsl?></x>" xml-error-test
T{ unclosed-quote f 1 13 } "<x value='/>" xml-error-test
T{ bad-name f 1 3 "-" } "<-/>" xml-error-test
-T{ quoteless-attr f 1 10 } "<x value=3/>" xml-error-test
\ No newline at end of file
+T{ quoteless-attr f 1 12 } "<x value=<->/>" xml-error-test
+T{ quoteless-attr f 1 10 } "<x value=3/>" xml-error-test
+T{ attr-w/< f 1 11 } "<x value='<'/>" xml-error-test
+T{ text-w/]]> f 1 6 } "<x>]]></x>" xml-error-test
+T{ duplicate-attr f 1 21 T{ name { space "" } { main "this" } } V{ "a" "b" } } "<x this='a' this='b'/>" xml-error-test
+T{ bad-cdata f 1 3 } "<![CDATA[]]><x/>" xml-error-test
+T{ bad-cdata f 1 7 } "<x/><![CDATA[]]>" xml-error-test
+T{ pre/post-content f "&" t } " <x/>" xml-error-test
+T{ bad-doctype f 1 17 "a" } "<!DOCTYPE foo [ a ]><x/>" xml-error-test
+T{ bad-doctype f 1 22 T{ opener { name T{ name f "" "foo" "" } } { attrs T{ attrs } } } } "<!DOCTYPE foo [ <foo> ]><x/>" xml-error-test
+T{ disallowed-char f 1 3 1 } "<x>\u000001</x>" xml-error-test
+T{ missing-close f 1 9 } "<!-- foo" xml-error-test
+T{ misplaced-directive f 1 9 "ENTITY" } "<!ENTITY foo 'bar'><x/>" xml-error-test
! Copyright (C) 2005, 2006 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: xml.data xml.writer kernel generic io prettyprint math
-debugger sequences state-parser accessors summary
-namespaces io.streams.string xml.backend ;
+debugger sequences xml.state accessors summary
+namespaces io.streams.string ;
IN: xml.errors
+TUPLE: xml-error-at line column ;
+
+: xml-error-at ( class -- obj )
+ new
+ get-line >>line
+ get-column >>column ;
+M: xml-error-at summary ( obj -- str )
+ [
+ "XML parsing error" print
+ "Line: " write dup line>> .
+ "Column: " write column>> .
+ ] with-string-writer ;
+
+TUPLE: expected < xml-error-at should-be was ;
+: expected ( should-be was -- * )
+ \ expected xml-error-at
+ swap >>was
+ swap >>should-be throw ;
+M: expected summary ( obj -- str )
+ [
+ dup call-next-method write
+ "Token expected: " write dup should-be>> print
+ "Token present: " write was>> print
+ ] with-string-writer ;
+
+TUPLE: unexpected-end < xml-error-at ;
+: unexpected-end ( -- * ) \ unexpected-end xml-error-at throw ;
+M: unexpected-end summary ( obj -- str )
+ [
+ call-next-method write
+ "File unexpectedly ended." print
+ ] with-string-writer ;
+
+TUPLE: missing-close < xml-error-at ;
+: missing-close ( -- * ) \ missing-close xml-error-at throw ;
+M: missing-close summary ( obj -- str )
+ [
+ call-next-method write
+ "Missing closing token." print
+ ] with-string-writer ;
+
+TUPLE: disallowed-char < xml-error-at char ;
+
+: disallowed-char ( char -- * )
+ \ disallowed-char xml-error-at swap >>char throw ;
+
+M: disallowed-char summary
+ [ call-next-method ]
+ [ char>> "Disallowed character in XML document: " swap suffix ] bi
+ append ;
+
ERROR: multitags ;
M: multitags summary ( obj -- str )
" the main tag." print
] with-string-writer ;
-TUPLE: no-entity < parsing-error thing ;
+TUPLE: no-entity < xml-error-at thing ;
: no-entity ( string -- * )
- \ no-entity parsing-error swap >>thing throw ;
+ \ no-entity xml-error-at swap >>thing throw ;
M: no-entity summary ( obj -- str )
[
"Entity does not exist: &" write thing>> write ";" print
] with-string-writer ;
-TUPLE: mismatched < parsing-error open close ;
+TUPLE: mismatched < xml-error-at open close ;
: mismatched ( open close -- * )
- \ mismatched parsing-error swap >>close swap >>open throw ;
+ \ mismatched xml-error-at swap >>close swap >>open throw ;
M: mismatched summary ( obj -- str )
[
"Closing tag: </" write close>> print-name ">" print
] with-string-writer ;
-TUPLE: unclosed < parsing-error tags ;
+TUPLE: unclosed < xml-error-at tags ;
: unclosed ( -- * )
- \ unclosed parsing-error
+ \ unclosed xml-error-at
xml-stack get rest-slice [ first name>> ] map >>tags
throw ;
tags>> [ " <" write print-name ">" print ] each
] with-string-writer ;
-TUPLE: bad-uri < parsing-error string ;
+TUPLE: bad-uri < xml-error-at string ;
: bad-uri ( string -- * )
- \ bad-uri parsing-error swap >>string throw ;
+ \ bad-uri xml-error-at swap >>string throw ;
M: bad-uri summary ( obj -- str )
[
"Bad URI:" print string>> .
] with-string-writer ;
-TUPLE: nonexist-ns < parsing-error name ;
+TUPLE: nonexist-ns < xml-error-at name ;
: nonexist-ns ( name-string -- * )
- \ nonexist-ns parsing-error swap >>name throw ;
+ \ nonexist-ns xml-error-at swap >>name throw ;
M: nonexist-ns summary ( obj -- str )
[
"Namespace " write name>> write " has not been declared" print
] with-string-writer ;
-TUPLE: unopened < parsing-error ; ! this should give which tag was unopened
+TUPLE: unopened < xml-error-at ; ! this should give which tag was unopened
: unopened ( -- * )
- \ unopened parsing-error throw ;
+ \ unopened xml-error-at throw ;
M: unopened summary ( obj -- str )
[
"Closed an unopened tag" print
] with-string-writer ;
-TUPLE: not-yes/no < parsing-error text ;
+TUPLE: not-yes/no < xml-error-at text ;
: not-yes/no ( text -- * )
- \ not-yes/no parsing-error swap >>text throw ;
+ \ not-yes/no xml-error-at swap >>text throw ;
M: not-yes/no summary ( obj -- str )
[
] with-string-writer ;
! this should actually print the names
-TUPLE: extra-attrs < parsing-error attrs ;
+TUPLE: extra-attrs < xml-error-at attrs ;
: extra-attrs ( attrs -- * )
- \ extra-attrs parsing-error swap >>attrs throw ;
+ \ extra-attrs xml-error-at swap >>attrs throw ;
M: extra-attrs summary ( obj -- str )
[
attrs>> .
] with-string-writer ;
-TUPLE: bad-version < parsing-error num ;
+TUPLE: bad-version < xml-error-at num ;
: bad-version ( num -- * )
- \ bad-version parsing-error swap >>num throw ;
+ \ bad-version xml-error-at swap >>num throw ;
M: bad-version summary ( obj -- str )
[
M: notags summary ( obj -- str )
drop "XML document lacks a main tag" ;
-TUPLE: bad-prolog < parsing-error prolog ;
+TUPLE: bad-prolog < xml-error-at prolog ;
: bad-prolog ( prolog -- * )
- \ bad-prolog parsing-error swap >>prolog throw ;
+ \ bad-prolog xml-error-at swap >>prolog throw ;
M: bad-prolog summary ( obj -- str )
[
prolog>> write-prolog nl
] with-string-writer ;
-TUPLE: capitalized-prolog < parsing-error name ;
+TUPLE: capitalized-prolog < xml-error-at name ;
: capitalized-prolog ( name -- capitalized-prolog )
- \ capitalized-prolog parsing-error swap >>name throw ;
+ \ capitalized-prolog xml-error-at swap >>name throw ;
M: capitalized-prolog summary ( obj -- str )
[
" instead of <?xml...?>" print
] with-string-writer ;
-TUPLE: versionless-prolog < parsing-error ;
+TUPLE: versionless-prolog < xml-error-at ;
: versionless-prolog ( -- * )
- \ versionless-prolog parsing-error throw ;
+ \ versionless-prolog xml-error-at throw ;
M: versionless-prolog summary ( obj -- str )
[
"XML prolog lacks a version declaration" print
] with-string-writer ;
-TUPLE: bad-instruction < parsing-error instruction ;
-
-: bad-instruction ( instruction -- * )
- \ bad-instruction parsing-error swap >>instruction throw ;
-
-M: bad-instruction summary ( obj -- str )
- [
- dup call-next-method write
- "Misplaced processor instruction:" print
- instruction>> write-xml-chunk nl
- ] with-string-writer ;
-
-TUPLE: bad-directive < parsing-error dir ;
+TUPLE: bad-directive < xml-error-at dir ;
: bad-directive ( directive -- * )
- \ bad-directive parsing-error swap >>dir throw ;
+ \ bad-directive xml-error-at swap >>dir throw ;
M: bad-directive summary ( obj -- str )
[
dir>> write
] with-string-writer ;
-TUPLE: bad-doctype-decl < parsing-error ;
+TUPLE: bad-decl < xml-error-at ;
-: bad-doctype-decl ( -- * )
- \ bad-doctype-decl parsing-error throw ;
+: bad-decl ( -- * )
+ \ bad-decl xml-error-at throw ;
-M: bad-doctype-decl summary ( obj -- str )
- call-next-method "\nBad DOCTYPE" append ;
+M: bad-decl summary ( obj -- str )
+ call-next-method "\nExtra content in directive" append ;
-TUPLE: bad-external-id < parsing-error ;
+TUPLE: bad-external-id < xml-error-at ;
: bad-external-id ( -- * )
- \ bad-external-id parsing-error throw ;
+ \ bad-external-id xml-error-at throw ;
M: bad-external-id summary ( obj -- str )
call-next-method "\nBad external ID" append ;
-TUPLE: misplaced-directive < parsing-error dir ;
+TUPLE: misplaced-directive < xml-error-at dir ;
: misplaced-directive ( directive -- * )
- \ misplaced-directive parsing-error swap >>dir throw ;
+ \ misplaced-directive xml-error-at swap >>dir throw ;
M: misplaced-directive summary ( obj -- str )
[
dir>> write-xml-chunk nl
] with-string-writer ;
-TUPLE: bad-name < parsing-error name ;
+TUPLE: bad-name < xml-error-at name ;
: bad-name ( name -- * )
- \ bad-name parsing-error swap >>name throw ;
+ \ bad-name xml-error-at swap >>name throw ;
M: bad-name summary ( obj -- str )
[ call-next-method ]
[ "Invalid name: " swap name>> "\n" 3append ]
bi append ;
-TUPLE: unclosed-quote < parsing-error ;
+TUPLE: unclosed-quote < xml-error-at ;
: unclosed-quote ( -- * )
- \ unclosed-quote parsing-error throw ;
+ \ unclosed-quote xml-error-at throw ;
M: unclosed-quote summary
call-next-method
"XML document ends with quote still open\n" append ;
-TUPLE: quoteless-attr < parsing-error ;
+TUPLE: quoteless-attr < xml-error-at ;
: quoteless-attr ( -- * )
- \ quoteless-attr parsing-error throw ;
+ \ quoteless-attr xml-error-at throw ;
M: quoteless-attr summary
call-next-method "Attribute lacks quotes around value\n" append ;
-UNION: xml-parse-error multitags notags extra-attrs nonexist-ns
- not-yes/no unclosed mismatched expected no-entity
- bad-prolog versionless-prolog capitalized-prolog bad-instruction
- bad-directive bad-name unclosed-quote quoteless-attr ;
+TUPLE: attr-w/< < xml-error-at ;
+
+: attr-w/< ( value -- * )
+ \ attr-w/< xml-error-at throw ;
+
+M: attr-w/< summary
+ call-next-method
+ "Attribute value contains literal <" append ;
+
+TUPLE: text-w/]]> < xml-error-at ;
+
+: text-w/]]> ( text -- * )
+ \ text-w/]]> xml-error-at throw ;
+
+M: text-w/]]> summary
+ call-next-method
+ "Text node contains ']]>'" append ;
+
+TUPLE: duplicate-attr < xml-error-at key values ;
+
+: duplicate-attr ( key values -- * )
+ \ duplicate-attr xml-error-at
+ swap >>values swap >>key throw ;
+
+M: duplicate-attr summary
+ call-next-method "\nDuplicate attribute" append ;
+
+TUPLE: bad-cdata < xml-error-at ;
+
+: bad-cdata ( -- * )
+ \ bad-cdata xml-error-at throw ;
+
+M: bad-cdata summary
+ call-next-method "\nCDATA occurs before or after main tag" append ;
+
+TUPLE: not-enough-characters < xml-error-at ;
+: not-enough-characters ( -- * )
+ \ not-enough-characters xml-error-at throw ;
+M: not-enough-characters summary ( obj -- str )
+ [
+ call-next-method write
+ "Not enough characters" print
+ ] with-string-writer ;
+
+TUPLE: bad-doctype < xml-error-at contents ;
+: bad-doctype ( contents -- * )
+ \ bad-doctype xml-error-at swap >>contents throw ;
+M: bad-doctype summary
+ call-next-method "\nDTD contains invalid object" append ;
+
+UNION: xml-error
+ multitags notags pre/post-content xml-error-at ;
--- /dev/null
+XML parsing errors
+++ /dev/null
-Daniel Ehrenberg
+++ /dev/null
-USING: tools.test io.streams.string xml.generator xml.writer accessors ;
-[ "<html><body><a href=\"blah\"/></body></html>" ]
-[ "html" [ "body" [ "a" { { "href" "blah" } } contained*, ] tag, ] make-xml [ body>> write-xml-chunk ] with-string-writer ] unit-test
+++ /dev/null
-! Copyright (C) 2006, 2007 Daniel Ehrenberg
-! See http://factorcode.org/license.txt for BSD license.
-USING: namespaces make kernel xml.data xml.utilities assocs
-sequences ;
-IN: xml.generator
-
-: comment, ( string -- ) <comment> , ;
-: instruction, ( string -- ) <instruction> , ;
-: nl, ( -- ) "\n" , ;
-
-: (tag,) ( name attrs quot -- tag )
- -rot [ V{ } make ] 2dip rot <tag> ; inline
-: tag*, ( name attrs quot -- )
- (tag,) , ; inline
-
-: contained*, ( name attrs -- )
- f <tag> , ;
-
-: tag, ( name quot -- ) f swap tag*, ; inline
-: contained, ( name -- ) f contained*, ; inline
-
-: make-xml* ( name attrs quot -- xml )
- (tag,) build-xml ; inline
-: make-xml ( name quot -- xml )
- f swap make-xml* ; inline
--- /dev/null
+Daniel Ehrenberg
\ No newline at end of file
--- /dev/null
+USING: help.markup help.syntax present multiline ;
+IN: xml.interpolate
+
+ABOUT: "xml.interpolate"
+
+ARTICLE: "xml.interpolate" "XML literal interpolation"
+"The " { $vocab-link "xml.interpolate" } " vocabulary provides a convenient syntax for generating XML documents and chunks. It defines the following parsing words:"
+{ $subsection POSTPONE: <XML }
+{ $subsection POSTPONE: [XML }
+"For a description of the common syntax of these two, see"
+{ $subsection { "xml.interpolate" "in-depth" } } ;
+
+HELP: <XML
+{ $syntax "<XML <?xml version=\"1.0\"?><document>...</document> XML>" }
+{ $description "This syntax allows the interpolation of XML documents. When evaluated, there is an XML document on the stack. For more information about XML interpolation, see " { $link { "xml.interpolate" "in-depth" } } "." } ;
+
+HELP: [XML
+{ $syntax "[XML foo <x>...</x> bar <y>...</y> baz XML]" }
+{ $description "This syntax allows the interpolation of XML chunks. When evaluated, there is a sequence of XML elements (tags, strings, comments, etc) on the stack. For more information about XML interpolation, see " { $link { "xml.interpolate" "in-depth" } } "." } ;
+
+ARTICLE: { "xml.interpolate" "in-depth" } "XML interpolation syntax"
+"XML interpolation has two forms for each of the words " { $link POSTPONE: <XML } " and " { $link POSTPONE: [XML } ": a fry-like form and a locals form. To splice locals in, use the syntax " { $snippet "<-variable->" } ". To splice something in from the stack, in the style of " { $vocab-link "fry" } ", use the syntax " { $snippet "<->" } ". An XML interpolation form may only use one of these styles."
+$nl
+"These forms can be used where a tag might go, as in " { $snippet "[XML <foo><-></foo> XML]" } " or where an attribute might go, as in " { $snippet "[XML <foo bar=<->/> XML]" } ". When an attribute is spliced in, it is not included if the value is " { $snippet "f" } " and if the value is not a string, the value is put through " { $link present } ". Here is an example of the fry style of XML interpolation:"
+{ $example
+{" USING: splitting sequences xml.writer xml.interpolate ;
+"one two three" " " split
+[ [XML <item><-></item> XML] ] map
+<XML <doc><-></doc> XML> pprint-xml"}
+{" <?xml version="1.0" encoding="UTF-8"?>
+<doc>
+ <item>
+ one
+ </item>
+ <item>
+ two
+ </item>
+ <item>
+ three
+ </item>
+</doc>"} }
+"Here is an example of the locals version:"
+{ $example
+{" USING: locals urls xml.interpolate xml.writer ;
+[let |
+ number [ 3 ]
+ false [ f ]
+ url [ URL" http://factorcode.org/" ]
+ string [ "hello" ]
+ word [ \ drop ] |
+ <XML
+ <x
+ number=<-number->
+ false=<-false->
+ url=<-url->
+ string=<-string->
+ word=<-word-> />
+ XML> pprint-xml ] "}
+{" <?xml version="1.0" encoding="UTF-8"?>
+<x number="3" url="http://factorcode.org/" string="hello" word="drop"/>"} } ;
--- /dev/null
+! Copyright (C) 2009 Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+USING: tools.test xml.interpolate multiline kernel assocs
+sequences accessors xml.writer xml.interpolate.private
+locals splitting urls ;
+IN: xml.interpolate.tests
+
+[ "a" "c" { "a" "c" f } ] [
+ "<?xml version='1.0'?><x><-a-><b val=<-c->/><-></x>"
+ string>doc
+ [ second var>> ]
+ [ fourth "val" swap at var>> ]
+ [ extract-variables ] tri
+] unit-test
+
+[ {" <?xml version="1.0" encoding="UTF-8"?>
+<x>
+ one
+ <b val="two"/>
+ y
+ <foo/>
+</x>"} ] [
+ [let* | a [ "one" ] c [ "two" ] x [ "y" ]
+ d [ [XML <-x-> <foo/> XML] ] |
+ <XML
+ <x> <-a-> <b val=<-c->/> <-d-> </x>
+ XML> pprint-xml>string
+ ]
+] unit-test
+
+[ {" <?xml version="1.0" encoding="UTF-8"?>
+<doc>
+ <item>
+ one
+ </item>
+ <item>
+ two
+ </item>
+ <item>
+ three
+ </item>
+</doc>"} ] [
+ "one two three" " " split
+ [ [XML <item><-></item> XML] ] map
+ <XML <doc><-></doc> XML> pprint-xml>string
+] unit-test
+
+[ {" <?xml version="1.0" encoding="UTF-8"?>
+<x number="3" url="http://factorcode.org/" string="hello" word="drop"/>"} ]
+[ 3 f URL" http://factorcode.org/" "hello" \ drop
+ <XML <x number=<-> false=<-> url=<-> string=<-> word=<->/> XML>
+ pprint-xml>string ] unit-test
+
+[ "<x>3</x>" ] [ 3 [XML <x><-></x> XML] xml-chunk>string ] unit-test
+[ "<x></x>" ] [ f [XML <x><-></x> XML] xml-chunk>string ] unit-test
+
+\ parse-def must-infer
+[ "" interpolate-chunk ] must-infer
+[ [XML <foo><-></foo> <bar val=<->/> XML] ] must-infer
--- /dev/null
+! Copyright (C) 2009 Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+USING: xml xml.state kernel sequences fry assocs xml.data
+accessors strings make multiline parser namespaces macros
+sequences.deep generalizations words combinators
+math present arrays ;
+IN: xml.interpolate
+
+<PRIVATE
+
+: string>chunk ( string -- chunk )
+ t interpolating? [ string>xml-chunk ] with-variable ;
+
+: string>doc ( string -- xml )
+ t interpolating? [ string>xml ] with-variable ;
+
+DEFER: interpolate-sequence
+
+: interpolate-attrs ( table attrs -- attrs )
+ swap '[
+ dup interpolated?
+ [ var>> _ at dup [ present ] when ] when
+ ] assoc-map [ nip ] assoc-filter ;
+
+: interpolate-tag ( table tag -- tag )
+ [ nip name>> ]
+ [ attrs>> interpolate-attrs ]
+ [ children>> [ interpolate-sequence ] [ drop f ] if* ] 2tri
+ <tag> ;
+
+GENERIC: push-item ( item -- )
+M: string push-item , ;
+M: xml-data push-item , ;
+M: object push-item present , ;
+M: sequence push-item
+ [ dup array? [ % ] [ , ] if ] each ;
+M: number push-item present , ;
+
+GENERIC: interpolate-item ( table item -- )
+M: object interpolate-item nip , ;
+M: tag interpolate-item interpolate-tag , ;
+M: interpolated interpolate-item
+ var>> swap at push-item ;
+
+: interpolate-sequence ( table seq -- seq )
+ [ [ interpolate-item ] with each ] { } make ;
+
+: interpolate-xml-doc ( table xml -- xml )
+ (clone) [ interpolate-tag ] change-body ;
+
+: (each-interpolated) ( item quot: ( interpolated -- ) -- )
+ {
+ { [ over interpolated? ] [ call ] }
+ { [ over tag? ] [
+ [ attrs>> values [ interpolated? ] filter ] dip each
+ ] }
+ { [ over xml? ] [ [ body>> ] dip (each-interpolated) ] }
+ [ 2drop ]
+ } cond ; inline recursive
+
+: each-interpolated ( xml quot -- )
+ '[ _ (each-interpolated) ] deep-each ; inline
+
+: number<-> ( doc -- dup )
+ 0 over [
+ dup var>> [ over >>var [ 1+ ] dip ] unless drop
+ ] each-interpolated drop ;
+
+MACRO: interpolate-xml ( string -- doc )
+ string>doc number<-> '[ _ interpolate-xml-doc ] ;
+
+MACRO: interpolate-chunk ( string -- chunk )
+ string>chunk number<-> '[ _ interpolate-sequence ] ;
+
+: >search-hash ( seq -- hash )
+ [ dup search ] H{ } map>assoc ;
+
+: extract-variables ( xml -- seq )
+ [ [ var>> , ] each-interpolated ] { } make ;
+
+: nenum ( ... n -- assoc )
+ narray <enum> ; inline
+
+: collect ( accum seq -- accum )
+ {
+ { [ dup [ ] all? ] [ >search-hash parsed ] } ! locals
+ { [ dup [ not ] all? ] [ ! fry
+ length parsed \ nenum parsed
+ ] }
+ [ drop "XML interpolation contains both fry and locals" throw ] ! mixed
+ } cond ;
+
+: parse-def ( accum delimiter word -- accum )
+ [
+ parse-multiline-string but-last
+ [ string>chunk extract-variables collect ] keep
+ parsed
+ ] dip parsed ;
+
+PRIVATE>
+
+: <XML
+ "XML>" \ interpolate-xml parse-def ; parsing
+
+: [XML
+ "XML]" \ interpolate-chunk parse-def ; parsing
--- /dev/null
+Syntax for XML interpolation
--- /dev/null
+syntax
+enterprise
--- /dev/null
+Daniel Ehrenberg
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: kernel namespaces accessors xml.tokenize xml.data assocs
+xml.errors xml.char-classes combinators.short-circuit splitting
+fry xml.state sequences combinators ascii ;
+IN: xml.name
+
+! XML namespace processing: ns = namespace
+
+! A stack of hashtables
+SYMBOL: ns-stack
+
+: attrs>ns ( attrs-alist -- hash )
+ ! this should check to make sure URIs are valid
+ [
+ [
+ swap dup space>> "xmlns" =
+ [ main>> set ]
+ [
+ T{ name f "" "xmlns" f } names-match?
+ [ "" set ] [ drop ] if
+ ] if
+ ] assoc-each
+ ] { } make-assoc f like ;
+
+: add-ns ( name -- )
+ dup space>> dup ns-stack get assoc-stack
+ [ nip ] [ nonexist-ns ] if* >>url drop ;
+
+: push-ns ( hash -- )
+ ns-stack get push ;
+
+: pop-ns ( -- )
+ ns-stack get pop* ;
+
+: init-ns-stack ( -- )
+ V{ H{
+ { "xml" "http://www.w3.org/XML/1998/namespace" }
+ { "xmlns" "http://www.w3.org/2000/xmlns" }
+ { "" "" }
+ } } clone
+ ns-stack set ;
+
+: tag-ns ( name attrs-alist -- name attrs )
+ dup attrs>ns push-ns
+ [ dup add-ns ] dip dup [ drop add-ns ] assoc-each <attrs> ;
+
+: valid-name? ( str -- ? )
+ [ f ] [
+ version=1.0? swap {
+ [ first name-start? ]
+ [ rest-slice [ name-char? ] with all? ]
+ } 2&&
+ ] if-empty ;
+
+: prefixed-name ( str -- name/f )
+ ":" split dup length 2 = [
+ [ [ valid-name? ] all? ]
+ [ first2 f <name> ] bi and
+ ] [ drop f ] if ;
+
+: interpret-name ( str -- name )
+ dup prefixed-name [ ] [
+ dup valid-name?
+ [ <simple-name> ] [ bad-name ] if
+ ] ?if ;
+
+: take-name ( -- string )
+ version=1.0? '[ _ get-char name-char? not ] take-until ;
+
+: parse-name ( -- name )
+ take-name interpret-name ;
+
+: parse-name-starting ( string -- name )
+ take-name append interpret-name ;
+
+: take-system-id ( -- system-id )
+ parse-quote <system-id> ;
+
+: take-public-id ( -- public-id )
+ parse-quote parse-quote <public-id> ;
+
+: (take-external-id) ( token -- external-id )
+ pass-blank {
+ { "SYSTEM" [ take-system-id ] }
+ { "PUBLIC" [ take-public-id ] }
+ [ bad-external-id ]
+ } case ;
+
+: take-word ( -- string )
+ [ get-char blank? ] take-until ;
+
+: take-external-id ( -- external-id )
+ take-word (take-external-id) ;
--- /dev/null
+Implements parsing XML names
--- /dev/null
+Daniel Ehrenberg
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors kernel namespaces io ;
+IN: xml.state
+
+TUPLE: spot char line column next check ;
+
+C: <spot> spot
+
+: get-char ( -- char ) spot get char>> ;
+: set-char ( char -- ) spot get swap >>char drop ;
+: get-line ( -- line ) spot get line>> ;
+: set-line ( line -- ) spot get swap >>line drop ;
+: get-column ( -- column ) spot get column>> ;
+: set-column ( column -- ) spot get swap >>column drop ;
+: get-next ( -- char ) spot get next>> ;
+: set-next ( char -- ) spot get swap >>next drop ;
+: get-check ( -- ? ) spot get check>> ;
+: check ( -- ) spot get t >>check drop ;
+
+SYMBOL: xml-stack
+
+SYMBOL: prolog-data
+
+SYMBOL: depth
+
+SYMBOL: interpolating?
+
+SYMBOL: in-dtd?
+
+SYMBOL: pe-table
+
+SYMBOL: extra-entities
--- /dev/null
+Primitive device for storing the state of the XML parser
+++ /dev/null
-! Copyright (C) 2005, 2006 Daniel Ehrenberg
-! See http://factorcode.org/license.txt for BSD license.
-IN: xml.tests
-USING: xml io kernel math sequences strings xml.utilities tools.test math.parser ;
-
-PROCESS: calculate ( tag -- n )
-
-: calc-2children ( tag -- n n )
- children-tags first2 [ calculate ] dip calculate ;
-
-TAG: number calculate
- children>string string>number ;
-TAG: add calculate
- calc-2children + ;
-TAG: minus calculate
- calc-2children - ;
-TAG: times calculate
- calc-2children * ;
-TAG: divide calculate
- calc-2children / ;
-TAG: neg calculate
- children-tags first calculate neg ;
-
-: calc-arith ( string -- n )
- string>xml first-child-tag calculate ;
-
-[ 32 ] [
- "<math><times><add><number>1</number><number>3</number></add><neg><number>-8</number></neg></times></math>"
- calc-arith
-] unit-test
<directoryTitle xsi:type="xsd:string"></directoryTitle>
<hostName xsi:type="xsd:string"></hostName>
<relatedInformationPresent xsi:type="xsd:boolean">true</relatedInformationPresent>
-<snippet xsi:type="xsd:string">The O\e$-1òùReilly <b>Factor</b> with Bill OòùReilly on FOXNews.com. Bill OòùReilly hosts The <br> OòùReilly <b>Factor</b>, the most-watched program on cable news.</snippet>
+<snippet xsi:type="xsd:string">The O$-1òùReilly <b>Factor</b> with Bill OòùReilly on FOXNews.com. Bill OòùReilly hosts The <br> OòùReilly <b>Factor</b>, the most-watched program on cable news.</snippet>
<summary xsi:type="xsd:string"></summary>
-<title xsi:type="xsd:string">Bill O\e$-1òùReilly | The OòùReilly <b>Factor</b> - FOXNews.com</title>
+<title xsi:type="xsd:string">Bill O$-1òùReilly | The OòùReilly <b>Factor</b> - FOXNews.com</title>
</item>
<item xsi:type="ns1:ResultElement">
<URL xsi:type="xsd:string">http://www.factor.ca/</URL>
--- /dev/null
+USING: tools.test xml.tokenize xml.state io.streams.string kernel io strings ascii ;
+IN: xml.test.state
+
+: string-parse ( str quot -- )
+ [ <string-reader> ] dip with-state ;
+
+: take-rest ( -- string )
+ [ f ] take-until ;
+
+: take-char ( char -- string )
+ 1string take-to ;
+
+[ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test
+[ 2 4 ] [ "12\n123" [ take-rest drop get-line get-column ] string-parse ] unit-test
+[ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test
+[ "foo" ";bar" ] [ "foo;bar" [ CHAR: ; take-char take-rest ] string-parse ] unit-test
+[ "foo " " bar" ] [ "foo and bar" [ "and" take-string take-rest ] string-parse ] unit-test
+[ "baz" ] [ " \n\t baz" [ pass-blank take-rest ] string-parse ] unit-test
-USING: kernel xml sequences assocs tools.test io arrays namespaces
-accessors xml.data xml.utilities xml.writer generic sequences.deep ;
+USING: kernel xml sequences assocs tools.test io arrays namespaces fry
+accessors xml.data xml.utilities xml.writer generic sequences.deep multiline ;
IN: xml.tests
: sub-tag
! Example
-: sample-doc ( -- string )
- {
- "<html xmlns:f='http://littledan.onigirihouse.com/namespaces/replace'>"
- "<body>"
- "<span f:sub='foo'/>"
- "<div f:sub='bar'/>"
- "<p f:sub='baz'>paragraph</p>"
- "</body></html>"
- } concat ;
+STRING: sample-doc
+<html xmlns:f='http://littledan.onigirihouse.com/namespaces/replace'>
+<body>
+<span f:sub='foo'/>
+<div f:sub='bar'/>
+<p f:sub='baz'>paragraph</p>
+</body></html>
+;
+
+STRING: expected-result
+<?xml version="1.0" encoding="UTF-8"?>
+<html xmlns:f="http://littledan.onigirihouse.com/namespaces/replace">
+ <body>
+ <span f:sub="foo">
+ foo
+ </span>
+ <div f:sub="bar">
+ blah
+ <a/>
+ </div>
+ <p f:sub="baz"/>
+ </body>
+</html>
+;
: test-refs ( -- string )
[
H{
{ "foo" { "foo" } }
- { "bar" { "blah" T{ tag f T{ name f "" "a" "" } f f } } }
+ { "bar" { "blah" T{ tag f T{ name f "" "a" "" } T{ attrs } f } } }
{ "baz" f }
} ref-table set
- sample-doc string>xml dup template xml>string
+ sample-doc string>xml dup template pprint-xml>string
] with-scope ;
-[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><html xmlns:f=\"http://littledan.onigirihouse.com/namespaces/replace\"><body><span f:sub=\"foo\">foo</span><div f:sub=\"bar\">blah<a/></div><p f:sub=\"baz\"/></body></html>" ] [ test-refs ] unit-test
+expected-result '[ _ ] [ test-refs ] unit-test
-! Copyright (C) 2005, 2006 Daniel Ehrenberg
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
IN: xml.tests
USING: kernel xml tools.test io namespaces make sequences
xml.errors xml.entities.html parser strings xml.data io.files
-xml.writer xml.utilities state-parser continuations assocs
+xml.utilities continuations assocs
sequences.deep accessors io.streams.string ;
! This is insufficient
\ read-xml must-infer
+[ [ drop ] each-element ] must-infer
+\ string>xml must-infer
SYMBOL: xml-file
[ ] [ "resource:basis/xml/tests/test.xml"
xml-file get T{ name f "" "this" "http://d.de" } swap at
] unit-test
[ t ] [ xml-file get children>> second contained-tag? ] unit-test
-[ "<a></b>" string>xml ] [ xml-parse-error? ] must-fail-with
+[ "<a></b>" string>xml ] [ xml-error? ] must-fail-with
[ T{ comment f "This is where the fun begins!" } ] [
xml-file get before>> [ comment? ] find nip
] unit-test
] unit-test
[ V{ "fa&g" } ] [ xml-file get "x" get-id children>> ] unit-test
[ "that" ] [ xml-file get "this" swap at ] unit-test
-[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><a b=\"c\"/>" ]
- [ "<a b='c'/>" string>xml xml>string ] unit-test
[ "abcd" ] [
"<main>a<sub>bc</sub>d<nothing/></main>" string>xml
[ [ dup string? [ % ] [ drop ] if ] deep-each ] "" make
at swap "z" [ tuck ] dip swap set-at
T{ name f "blah" "z" f } swap at ] unit-test
[ "foo" ] [ "<boo><![CDATA[foo]]></boo>" string>xml children>string ] unit-test
-[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><foo>bar baz</foo>" ]
-[ "<foo>bar</foo>" string>xml [ " baz" append ] map xml>string ] unit-test
-[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<foo>\n bar\n</foo>" ]
-[ "<foo> bar </foo>" string>xml pprint-xml>string ] unit-test
[ "<!-- B+, B, or B--->" string>xml ] must-fail
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
-[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk first ] unit-test
-[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>xml-chunk first ] unit-test
-[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>xml-chunk first ] unit-test
-[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>xml-chunk first ] unit-test
+[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>dtd directives>> first ] unit-test
+[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>dtd directives>> first ] unit-test
+[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>dtd directives>> first ] unit-test
+[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>dtd directives>> first ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk first ] unit-test
-[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk [ write-xml-chunk ] with-string-writer = ] unit-test
-[ "foo" ] [ "<!ENTITY bar 'foo'><x>&bar;</x>" string>xml children>string ] unit-test
-[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test
-[ 958 ] [ [ "ξ" string>xml-chunk ] with-html-entities first first ] unit-test
\ No newline at end of file
+[ 958 ] [ [ "ξ" string>xml-chunk ] with-html-entities first first ] unit-test
+[ "x" "<" ] [ "<x value='<'/>" string>xml [ name>> main>> ] [ "value" swap at ] bi ] unit-test
+[ "foo" ] [ "<!DOCTYPE foo [<!ENTITY bar 'foo'>]><x>&bar;</x>" string>xml children>string ] unit-test
--- /dev/null
+USING: accessors assocs combinators continuations fry generalizations
+io.pathnames kernel macros sequences stack-checker tools.test xml
+xml.utilities xml.writer arrays ;
+IN: xml.tests.suite
+
+TUPLE: xml-test id uri sections description type ;
+
+: >xml-test ( tag -- test )
+ xml-test new swap {
+ [ "TYPE" swap at >>type ]
+ [ "ID" swap at >>id ]
+ [ "URI" swap at >>uri ]
+ [ "SECTIONS" swap at >>sections ]
+ [ children>> xml-chunk>string >>description ]
+ } cleave ;
+
+: parse-tests ( xml -- tests )
+ "TEST" tags-named [ >xml-test ] map ;
+
+: base "resource:basis/xml/tests/xmltest/" ;
+
+MACRO: drop-output ( quot -- newquot )
+ dup infer out>> '[ @ _ ndrop ] ;
+
+MACRO: drop-input ( quot -- newquot )
+ infer in>> '[ _ ndrop ] ;
+
+: fails? ( quot -- ? )
+ [ '[ _ drop-output f ] ]
+ [ '[ drop _ drop-input t ] ] bi recover ; inline
+
+: well-formed? ( uri -- answer )
+ [ file>xml ] fails? "not-wf" "valid" ? ;
+
+: test-quots ( test -- result quot )
+ [ type>> '[ _ ] ]
+ [ '[ _ uri>> base swap append-path well-formed? ] ] bi ;
+
+: xml-tests ( -- tests )
+ base "xmltest.xml" append-path file>xml
+ parse-tests [ test-quots 2array ] map ;
+
+: run-xml-tests ( -- )
+ xml-tests [ unit-test ] assoc-each ;
+
+: works? ( result quot -- ? )
+ [ first ] [ call ] bi* = ;
+
+: partition-xml-tests ( -- successes failures )
+ xml-tests [ first2 works? ] partition ;
+
+: failing-valids ( -- tests )
+ partition-xml-tests nip [ second first ] map [ type>> "valid" = ] filter ;
--- /dev/null
+<HTML>\r
+<TITLE>Canonical XML</TITLE>\r
+<BODY>\r
+<H1>Canonical XML</H1>\r
+<P>\r
+This document defines a subset of XML called canonical XML.\r
+The intended use of canonical XML is in testing XML processors,\r
+as a representation of the result of parsing an XML document.\r
+<P>\r
+Every well-formed XML document has a unique structurally equivalent\r
+canonical XML document. Two structurally equivalent XML\r
+documents have a byte-for-byte identical canonical XML document.\r
+Canonicalizing an XML document requires only information that an XML\r
+processor is required to make available to an application.\r
+<P>\r
+A canonical XML document conforms to the following grammar:\r
+<PRE>\r
+CanonXML ::= Pi* element Pi*\r
+element ::= Stag (Datachar | Pi | element)* Etag\r
+Stag ::= '<' Name Atts '>'\r
+Etag ::= '</' Name '>'\r
+Pi ::= '<?' Name ' ' (((Char - S) Char*)? - (Char* '?>' Char*)) '?>'\r
+Atts ::= (' ' Name '=' '"' Datachar* '"')*\r
+Datachar ::= '&amp;' | '&lt;' | '&gt;' | '&quot;'\r
+ | '&#9;'| '&#10;'| '&#13;'\r
+ | (Char - ('&' | '<' | '>' | '"' | #x9 | #xA | #xD))\r
+Name ::= (see XML spec)\r
+Char ::= (see XML spec)\r
+S ::= (see XML spec)\r
+</PRE>\r
+<P>\r
+Attributes are in lexicographical order (in Unicode bit order).\r
+<P>\r
+A canonical XML document is encoded in UTF-8.\r
+<P>\r
+Ignorable white space is considered significant and is treated equivalently\r
+to data.\r
+<P>\r
+<ADDRESS>\r
+<A HREF="mailto:jjc@jclark.com">James Clark</A>\r
+</ADDRESS>\r
+\r
+</BODY>\r
+</HTML>
\ No newline at end of file
--- /dev/null
+<!ENTITY % e "(#PCDATA">\r
+<!ELEMENT doc %e;)>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "002.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ENTITY % e ">">\r
+<!ELEMENT doc (#PCDATA) %e;\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "005.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ENTITY % e "(#PCDATA)>">\r
+<!ELEMENT doc %e;\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "006.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ENTITY % e "INCLUDE[">\r
+<!ELEMENT doc (#PCDATA)>\r
+<![ %e; <!ATTLIST doc a1 CDATA "v1"> ]]>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "022.ent">\r
+<doc></doc>\r
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+&e;
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e SYSTEM "001.ent">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<?xml version="1.0" standalone="yes"?>\r
+data\r
+\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e SYSTEM "002.ent">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?><?xml version="1.0" encoding="UTF-8"?>\r
+data\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e SYSTEM "003.ent">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<![ INCLUDE [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "001.ent">\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY % e "<?xml version='1.0' encoding='UTF-8'?>">\r
+%e;\r
+]>\r
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<![ IGNORE [\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "003.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<![ INCLUDE [\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "004.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+%e;\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "005.ent">\r
+<doc></doc>\r
--- /dev/null
+<![INCLUDE\r
+<!ELEMENT doc (#PCDATA)>\r
+]]>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "006.ent">\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "007.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc ANY>\r
+<!ENTITY e "100%">\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "008.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc EMPTY>\r
+<!ENTITY % e "<!--">\r
+%e; -->\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "009.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ENTITY % e "<!ELEMENT ">\r
+%e; doc (#PCDATA)>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "010.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ENTITY % e1 "<!ELEMENT ">\r
+<!ENTITY % e2 ">">\r
+%e1; doc (#PCDATA) %e2;\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "011.ent">\r
+<doc></doc>\r
--- /dev/null
+<doc>\r
+<doc\r
+?\r
+<a</a>\r
+</doc>\r
--- /dev/null
+<doc>\r
+<.doc></.doc>\r
+</doc>\r
+\r
--- /dev/null
+<doc><? ?></doc>\r
--- /dev/null
+<doc><?target some data></doc>\r
--- /dev/null
+<doc><?target some data?</doc>\r
--- /dev/null
+<doc><!-- a comment -- another --></doc>\r
--- /dev/null
+<doc>& no refc</doc>\r
--- /dev/null
+<doc>&.entity;</doc>\r
--- /dev/null
+<doc>&#RE;</doc>\r
--- /dev/null
+<doc>A & B</doc>\r
--- /dev/null
+<doc a1></doc>\r
--- /dev/null
+<doc a1=v1></doc>\r
--- /dev/null
+<doc a1="v1'></doc>\r
--- /dev/null
+<doc a1="<foo>"></doc>\r
--- /dev/null
+<doc a1=></doc>\r
--- /dev/null
+<doc a1="v1" "v2"></doc>\r
--- /dev/null
+<doc><![CDATA[</doc>\r
--- /dev/null
+<doc><![CDATA [ stuff]]></doc>\r
--- /dev/null
+<doc></>\r
--- /dev/null
+<doc a1="A & B"></doc>\r
--- /dev/null
+<doc a1="a&b"></doc>\r
--- /dev/null
+<doc a1="{:"></doc>\r
--- /dev/null
+<doc 12="34"></doc>\r
--- /dev/null
+<doc>\r
+<123></123>\r
+</doc>\r
--- /dev/null
+<doc>]]></doc>\r
--- /dev/null
+<doc>]]]></doc>\r
--- /dev/null
+<doc>\r
+<!-- abc\r
+</doc>\r
--- /dev/null
+<doc>\r
+<?a pi that is not closed\r
+</doc>\r
+\r
--- /dev/null
+<doc>abc]]]>def</doc>\r
--- /dev/null
+<doc>A form feed (\f) is not legal in data</doc>\r
--- /dev/null
+<doc><?pi a form feed (\f) is not allowed in a pi?></doc>\r
--- /dev/null
+<doc><!-- a form feed (\f) is not allowed in a comment --></doc>\r
--- /dev/null
+<doc>abc\edef</doc>\r
--- /dev/null
+<doc\f>A form-feed is not white space or a name character</doc\f>\r
--- /dev/null
+<doc>1 < 2 but not in XML</doc>\r
--- /dev/null
+<doc></doc>\r
+Illegal data\r
--- /dev/null
+<doc></doc>\r
+ \r
--- /dev/null
+<doc x="foo" y="bar" x="baz"></doc>\r
--- /dev/null
+<doc><a></aa></doc>\r
--- /dev/null
+<doc></doc>\r
+<doc></doc>\r
--- /dev/null
+<doc/>\r
+<doc></doc>\r
--- /dev/null
+<doc/></doc/>\r
--- /dev/null
+<doc/>\r
+Illegal data\r
--- /dev/null
+<doc/><doc/>\r
--- /dev/null
+<doc>\r
+<a/\r
+</doc>\r
+\r
--- /dev/null
+<doc>\r
+<a/</a>\r
+</doc>\r
--- /dev/null
+<doc>\r
+<a / >\r
+</doc>\r
--- /dev/null
+<doc>\r
+</doc>\r
+<![CDATA[]]>\r
--- /dev/null
+<doc>\r
+<a><![CDATA[xyz]]]></a>\r
+<![CDATA[]]></a>\r
+</doc>\r
--- /dev/null
+<!-- a comment -->\r
+<![CDATA[]]>\r
+<doc></doc>\r
--- /dev/null
+<!-- a comment -->\r
+ \r
+<doc></doc>\r
--- /dev/null
+<doc></DOC>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY foo PUBLIC "some public id">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc -- a comment -- []>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "whatever" -- a comment -->\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 (foo,bar) #IMPLIED>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 NMTOKEN v1>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 NAME #IMPLIED>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e PUBLIC "whatever""e.ent">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY foo"some text">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<![INCLUDE[ ]]>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST e a1 CDATA"foo">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1(foo|bar) #IMPLIED>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 (foo|bar)#IMPLIED>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 (foo)"foo">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 NOTATION(foo) #IMPLIED>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!NOTATION eps SYSTEM "eps.exe">\r
+<!-- missing space before NDATA -->\r
+<!ENTITY foo SYSTEM "foo.eps"NDATA eps>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!-- a comment ending with three dashes --->\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e1 "&e2;">\r
+<!ENTITY e2 "&e3;">\r
+<!ENTITY e3 "&e1;">\r
+]>\r
+<doc>&e1;</doc>\r
--- /dev/null
+<doc>&foo;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "whatever">\r
+]>\r
+<doc>&f;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "</foo><foo>">\r
+]>\r
+<doc>\r
+<foo>&e;</foo>\r
+</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e1 "&e2;">\r
+<!ENTITY e2 "&e3;">\r
+<!ENTITY e3 "&e1;">\r
+]>\r
+<doc a="&e1;"></doc>\r
+\r
--- /dev/null
+<doc a="&foo;"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY foo "&bar;">\r
+]>\r
+<doc a="&foo;"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA "&foo;">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e1 "&e2;">\r
+<!ENTITY e2 "&e3;">\r
+<!ENTITY e3 "&e1;">\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA "&e1;">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e1 "&e2;">\r
+<!ENTITY e2 "&e3;">\r
+<!ENTITY e3 "&e1;">\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA #FIXED "&e1;">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e SYSTEM "nul">\r
+]>\r
+<doc a="&e;"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e SYSTEM "nul">\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA "&e;">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e SYSTEM "nul" NDATA n>\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e SYSTEM "nul" NDATA n>\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA "&e;">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc PUBLIC "[" "null.ent">\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY foo PUBLIC "[" "null.xml">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!NOTATION foo PUBLIC "[" "null.ent">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA #IMPLIED>\r
+<!ENTITY e '"'>\r
+]>\r
+<doc a="&e;></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY % foo SYSTEM "foo.xml" NDATA bar>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "<foo a='<'></foo>">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!NOTATION n SYSTEM "n">\r
+<!ENTITY % foo SYSTEM "foo.xml" NDATA n>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "<foo a='&'></foo>">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<doc>X</doc>\r
--- /dev/null
+<?xml VERSION="1.0"?>\r
+<doc></doc>\r
--- /dev/null
+<?xml encoding="UTF-8" version="1.0"?>\r
+<doc></doc>\r
--- /dev/null
+<?xml version="1.0"encoding="UTF-8" ?>\r
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0' encoding="UTF-8" ?>\r
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0" version="1.0"?>\r
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0" valid="no" ?>\r
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0" standalone="YES" ?>\r
+<doc></doc>\r
--- /dev/null
+<?xml version="1.0" encoding=" UTF-8"?>\r
+<doc></doc>\r
--- /dev/null
+<?xml version="1.0 " ?>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "<foo>">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "<foo>">\r
+]>\r
+<doc>&e;</foo></doc>\r
--- /dev/null
+<?pi stuff?>\r
+<![CDATA[]]>\r
+<doc>\r
+</doc>\r
--- /dev/null
+<?pi data?>\r
+ <doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<![CDATA[]]>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<doc>\r
+<![CDATA [ ]]>\r
+</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "<doc></doc>">\r
+]>\r
+&e;\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "">\r
+]>\r
+<doc></doc>\r
+&e;\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "foo='bar'">\r
+]>\r
+<doc &e;></doc>\r
--- /dev/null
+<doc>\r
+<![cdata[data]]>\r
+</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY % foo "&">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY foo "&">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "&">\r
+]>\r
+<doc a="&e;"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "&#9">\r
+]>\r
+<doc>&e;7;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "&">\r
+]>\r
+<doc>&e;#97;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "#">\r
+]>\r
+<doc>&&e;97;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "&">\r
+]>\r
+<doc>\r
+&e;#38;\r
+</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "&">\r
+]>\r
+<doc>\r
+&e;\r
+</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY #DEFAULT "default">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (a, (b) | c)?>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc ((doc?)))>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (doc|#PCDATA)*>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc ((#PCDATA))>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)+>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)?>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc CDATA>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc - - (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (doc?) +(foo)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (doc?) -(foo)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (a, (b, c), (d, (e, f) | g))?>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (a *)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (a) *>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (a & b)?>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc O O (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc(#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (doc*?)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc ()>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "<゚></゚>">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "<X๜></X๜>">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>�</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>�</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>�</doc>\r
--- /dev/null
+\r
+<?xml version="1.0"?>\r
+<doc></doc>\r
--- /dev/null
+<!-- -->\r
+<?xml version="1.0"?>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<?xml version="1.0"?>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<doc>\r
+<?xml version="1.0"?>\r
+</doc>\r
--- /dev/null
+<doc>\r
+</doc>\r
+<?xml version="1.0"?>\r
--- /dev/null
+<?xml encoding="UTF-8"?>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e "<?xml encoding='UTF-8'?>">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<?XML version="1.0"?>\r
+<doc></doc>\r
--- /dev/null
+<?xmL version="1.0"?>\r
+<doc></doc>\r
--- /dev/null
+<doc>\r
+<?xMl version="1.0"?>\r
+</doc>\r
--- /dev/null
+<doc>\r
+<?xmL?>\r
+</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!NOTATION gif PUBLIC "image/gif" "">\r
+<!ATTLIST #NOTATION gif a1 CDATA #IMPLIED>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e "<![CDATA[Tim & Michael]]>">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY % e "">\r
+<!ENTITY foo "%e;">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY % e "#PCDATA">\r
+<!ELEMENT doc (%e;)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY % e1 "">\r
+<!ENTITY % e2 "%e1;">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY % e "">\r
+]>\r
+%e;\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY % e "">\r
+] %e; >\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY% e "">\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<doc>ï¿¿</doc>\r
--- /dev/null
+<doc>￾</doc>\r
--- /dev/null
+<doc>í €</doc>\r
--- /dev/null
+<doc>í°€</doc>\r
--- /dev/null
+<doc>÷€€€</doc>\r
--- /dev/null
+<!-- ï¿¿ -->\r
+<doc></doc>\r
--- /dev/null
+<?pi ï¿¿?>\r
+<doc></doc>\r
--- /dev/null
+<doc a="ï¿¿"></doc>\r
--- /dev/null
+<doc><![CDATA[ï¿¿]]></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY % e "ï¿¿">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>Aï¿¿</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA #IMPLIED>\r
+]>\r
+<doc a=""></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA "&e;">\r
+<!ENTITY e "v">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "<![CDATA[">\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>&e;]]></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "<!--">\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>&e;--></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA | foo*)* >\r
+<!ELEMENT foo EMPTY>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA | (foo))* >\r
+<!ELEMENT foo EMPTY>\r
+]>\r
+<doc></doc>\r
+\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
--- /dev/null
+<?xml version="1.0" standalone="yes"?>\r
+<!DOCTYPE doc SYSTEM "185.ent">\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE a [\r
+<!ELEMENT a EMPTY>\r
+<!ATTLIST a b CDATA #IMPLIED d CDATA #IMPLIED>\r
+]>\r
+<a b="c"d="e"/>\r
--- /dev/null
+<HTML>\r
+<TITLE>XML Test Cases</TITLE>\r
+<BODY>\r
+<H1>XML Test Cases version 1998-11-18</H1>\r
+<P>\r
+Copyright (C) 1998 James Clark. All rights reserved. Permission is\r
+granted to copy and modify this collection in any way for internal use\r
+within a company or organization. Permission is granted to\r
+redistribute the file <code>xmltest.zip</code> containing this\r
+collection to third parties provided that no modifications of any kind\r
+are made to this file. Note that permission to distribute the\r
+collection in any other form is not granted.\r
+<P>\r
+The collection is structured into three directories:\r
+<DL>\r
+<DT><CODE>not-wf</CODE>\r
+<DD>this contains cases that are not well-formed XML documents\r
+<DT><CODE>valid</CODE>\r
+<DD>this contains cases that are valid XML documents\r
+<DT><CODE>invalid</CODE>\r
+<DD>this contains cases that are well-formed XML documents\r
+but are not valid XML documents\r
+</DL>\r
+<P>\r
+The <CODE>not-wf</CODE> and <CODE>valid</CODE> directories each have\r
+three subdirectories:\r
+<DL>\r
+<DT>\r
+<CODE>sa</CODE>\r
+<DD>\r
+this contains cases that are standalone (as defined in XML) and do not\r
+have references to external general entities\r
+<DT>\r
+<CODE>ext-sa</CODE>\r
+<DD>\r
+this contains case that are standalone and have references to external\r
+general entities\r
+<DT>\r
+<CODE>not-sa</CODE>\r
+<DD>\r
+this contains cases that are not standalone\r
+</DL>\r
+<P>\r
+In each directory, files with a <CODE>.xml</CODE> extension are the\r
+XML document test cases, and files with a <CODE>.ent</CODE> extension\r
+are external entities referenced by the test cases.\r
+<P>\r
+Within the <CODE>valid</CODE> directory, each of these three\r
+subdirectories has an <CODE>out</CODE> subdirectory which contains an\r
+equivalent <A HREF="canonxml.html">canonical XML</A> document for each\r
+of the cases.\r
+<P>\r
+<P>\r
+Bug reports and contributions of new test cases are welcome.\r
+<P>\r
+<ADDRESS>\r
+<A HREF="mailto:jjc@jclark.com">James Clark</A>\r
+</ADDRESS>\r
+</BODY>\r
+</HTML>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e SYSTEM "001.ent">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+Data
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e SYSTEM "002.ent">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e SYSTEM "003.ent">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+Data\r
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e SYSTEM "004.ent">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<e/><e/><e/>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (e*)>\r
+<!ELEMENT e EMPTY>\r
+<!ENTITY e SYSTEM "005.ent">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+Data\r
+<e/>\r
+More data\r
+<e/>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA|e)*>\r
+<!ELEMENT e EMPTY>\r
+<!ENTITY e SYSTEM "006.ent">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e SYSTEM "007.ent">\r
+]>\r
+<doc>X&e;Z</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e SYSTEM "008.ent">\r
+]>\r
+<doc>X&e;Z</doc>\r
--- /dev/null
+\r
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e SYSTEM "009.ent">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e SYSTEM "010.ent">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e PUBLIC "a not very interesting file" "011.ent">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+&e4;
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e1 "&e2;">\r
+<!ENTITY e2 "&e3;">\r
+<!ENTITY e3 SYSTEM "012.ent">\r
+<!ENTITY e4 "&e5;">\r
+<!ENTITY e5 "(e5)">\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>&e1;</doc>\r
--- /dev/null
+<e/>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (e)>\r
+<!ELEMENT e (#PCDATA)>\r
+<!ATTLIST e\r
+ a1 CDATA "a1 default"\r
+ a2 NMTOKENS "a2 default"\r
+>\r
+<!ENTITY x SYSTEM "013.ent">\r
+]>\r
+<doc>&x;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e SYSTEM "014.ent">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<doc>Data </doc>
\ No newline at end of file
--- /dev/null
+<doc>Data</doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc>Data </doc>
\ No newline at end of file
--- /dev/null
+<doc><e></e><e></e><e></e></doc>
\ No newline at end of file
--- /dev/null
+<doc>Data <e></e> More data <e></e> </doc>
\ No newline at end of file
--- /dev/null
+<doc>XYZ</doc>
\ No newline at end of file
--- /dev/null
+<doc>XYZ</doc>
\ No newline at end of file
--- /dev/null
+<doc> </doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc>xyzzy </doc>
\ No newline at end of file
--- /dev/null
+<doc>(e5)</doc>
\ No newline at end of file
--- /dev/null
+<doc><e a1="a1 default" a2="a2 default"></e></doc>
\ No newline at end of file
--- /dev/null
+<doc>data</doc>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc SYSTEM "001.ent" [\r
+<!ELEMENT doc EMPTY>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+\r
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc SYSTEM "002.ent" [\r
+<!ELEMENT doc EMPTY>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc EMPTY>\r
+<!ENTITY % e SYSTEM "003-2.ent">\r
+<!ATTLIST doc a1 CDATA %e; "v1">\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "003-1.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc EMPTY>\r
+<!ENTITY % e1 SYSTEM "004-2.ent">\r
+<!ENTITY % e2 "%e1;">\r
+%e1;\r
--- /dev/null
+<!ATTLIST doc a1 CDATA "value">\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "004-1.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc EMPTY>\r
+<!ENTITY % e SYSTEM "005-2.ent">\r
+%e;\r
--- /dev/null
+<!ATTLIST doc a1 CDATA "v1">\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "005-1.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc EMPTY>\r
+<!ATTLIST doc a1 CDATA "w1" a2 CDATA "w2">\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "006.ent" [\r
+<!ATTLIST doc a1 CDATA "v1">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA "v1">\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "007.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA "v1">\r
--- /dev/null
+<!DOCTYPE doc PUBLIC "whatever" "008.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA "v1">\r
--- /dev/null
+<!DOCTYPE doc PUBLIC "whatever" "009.ent" [\r
+<!ATTLIST doc a2 CDATA "v2">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA "v2">\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "010.ent" [\r
+<!ATTLIST doc a1 CDATA "v1">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA "v1">\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY % e SYSTEM "011.ent">\r
+%e;\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA "v1">\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY % e SYSTEM "012.ent">\r
+%e;\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<![ INCLUDE [\r
+<!ATTLIST doc a1 CDATA "v1">\r
+]]>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "013.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<![ %e; [\r
+<!ATTLIST doc a1 CDATA "v1">\r
+]]>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "014.ent" [\r
+<!ENTITY % e "INCLUDE">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<![ %e; [\r
+<!ATTLIST doc a1 CDATA "v1">\r
+]]>\r
+<!ATTLIST doc a2 CDATA "v2">\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "015.ent" [\r
+<!ENTITY % e "IGNORE">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<![%e;[\r
+<!ATTLIST doc a1 CDATA "v1">\r
+]]>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "016.ent" [\r
+<!ENTITY % e "INCLUDE">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY % e "<!ATTLIST doc a1 CDATA 'v1'>">\r
+%e;\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "017.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY % e "'v1'">\r
+<!ATTLIST doc a1 CDATA %e;>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "018.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY % e "'v1'">\r
+<!ATTLIST doc a1 CDATA%e;>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "019.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ENTITY % e "doc">\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST%e;a1 CDATA "v1">\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "020.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ENTITY % e "doc a1 CDATA">\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST %e; "v1">\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "021.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY % e1 "do">\r
+<!ENTITY % e2 "c">\r
+<!ENTITY % e3 "%e1;%e2;">\r
+<!ATTLIST %e3; a1 CDATA "v1">\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "023.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY % e1 "'v1'">\r
+<!ENTITY % e2 'a1 CDATA %e1;'>\r
+<!ATTLIST doc %e2;>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "024.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc EMPTY>\r
+<!ENTITY % e "x">\r
+<!ENTITY % e "y">\r
+<!ENTITY % v "'%e;'">\r
+<!ATTLIST doc a1 CDATA %v;>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "025.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ATTLIST doc a1 CDATA "w1">\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc ANY>\r
+<!ENTITY % e SYSTEM "026.ent">\r
+%e;\r
+<!ATTLIST doc a1 CDATA "x1" a2 CDATA "x2">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!ENTITY % e "">\r
+<!ELEMENT doc (#PCDATA %e;)>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "027.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<![INCLUDE[<!ATTLIST doc a1 CDATA "v1">]]>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "028.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<![IGNORE[<!ATTLIST doc a1 CDATA "v1">]]>\r
+<!ATTLIST doc a1 CDATA "v2">\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "029.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<![IGNORE[]]>\r
+<![INCLUDE[]]>\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "030.ent">\r
+<doc></doc>\r
--- /dev/null
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY % e SYSTEM "031-2.ent">\r
+<!ENTITY e "<![CDATA[%e;]]>">\r
--- /dev/null
+<!ATTLIST doc a1 CDATA "v1">\r
--- /dev/null
+<!DOCTYPE doc SYSTEM "031-1.ent">\r
+<doc>&e;</doc>\r
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="value"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1" a2="w2"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1" a2="v2"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a2="v2"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="x"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="w1" a2="x2"></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v2"></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc><!ATTLIST doc a1 CDATA "v1"> </doc>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc ></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc >\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA #IMPLIED>\r
+]>\r
+<doc a1="v1"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA #IMPLIED>\r
+]>\r
+<doc a1 = "v1"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA #IMPLIED>\r
+]>\r
+<doc a1='v1'></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc> </doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>&<>"'</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc> </doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA #IMPLIED>\r
+]>\r
+<doc a1="v1" ></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA #IMPLIED a2 CDATA #IMPLIED>\r
+]>\r
+<doc a1="v1" a2="v2"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc : CDATA #IMPLIED>\r
+]>\r
+<doc :="v1"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc _.-0123456789 CDATA #IMPLIED>\r
+]>\r
+<doc _.-0123456789="v1"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc abcdefghijklmnopqrstuvwxyz CDATA #IMPLIED>\r
+]>\r
+<doc abcdefghijklmnopqrstuvwxyz="v1"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc ABCDEFGHIJKLMNOPQRSTUVWXYZ CDATA #IMPLIED>\r
+]>\r
+<doc ABCDEFGHIJKLMNOPQRSTUVWXYZ="v1"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc><?pi?></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc><?pi some data ? > <??></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc><![CDATA[<foo>]]></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc><![CDATA[<&]]></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc><![CDATA[<&]>]]]></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc><!-- a comment --></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc><!-- a comment ->--></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e "">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (foo)>\r
+<!ELEMENT foo (#PCDATA)>\r
+<!ENTITY e "<foo></foo>">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (foo*)>\r
+<!ELEMENT foo (#PCDATA)>\r
+]>\r
+<doc><foo/><foo></foo></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (foo*)>\r
+<!ELEMENT foo EMPTY>\r
+]>\r
+<doc><foo/><foo></foo></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (foo*)>\r
+<!ELEMENT foo ANY>\r
+]>\r
+<doc><foo/><foo></foo></doc>\r
--- /dev/null
+<?xml version="1.0"?>\r
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<?xml version='1.0'?>\r
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<?xml version = "1.0"?>\r
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<?xml version='1.0' encoding="UTF-8"?>\r
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<?xml version='1.0' standalone='yes'?>\r
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<?xml version='1.0' encoding="UTF-8" standalone='yes'?>\r
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc/>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc />\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
+<?pi data?>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
+<!-- comment -->\r
+\r
--- /dev/null
+<!-- comment -->\r
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
+\r
--- /dev/null
+<?pi data?>\r
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA #IMPLIED>\r
+]>\r
+<doc a1=""<&>'"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA #IMPLIED>\r
+]>\r
+<doc a1="A"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>A</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ATTLIST doc a1 CDATA #IMPLIED>\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc a1="foo\r
+bar"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (e*)>\r
+<!ELEMENT e EMPTY>\r
+<!ATTLIST e a1 CDATA "v1" a2 CDATA "v2" a3 CDATA #IMPLIED>\r
+]>\r
+<doc>\r
+<e a3="v3"/>\r
+<e a1="w1"/>\r
+<e a2="w2" a3="v3"/>\r
+</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA "v1">\r
+<!ATTLIST doc a1 CDATA "z1">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA "v1">\r
+<!ATTLIST doc a2 CDATA "v2">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>X\r
+Y</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>]</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>ð€€ô¿½</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "<e/>">\r
+<!ELEMENT doc (e)>\r
+<!ELEMENT e EMPTY>\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+\r
+\r
+<doc\r
+></doc\r
+>\r
+\r
+\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<?pi data?>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>A</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (a*)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ATTLIST doc a1 NMTOKENS #IMPLIED>\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc a1=" 1 2 "></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (e*)>\r
+<!ELEMENT e EMPTY>\r
+<!ATTLIST e a1 CDATA #IMPLIED a2 CDATA #IMPLIED a3 CDATA #IMPLIED>\r
+]>\r
+<doc>\r
+<e a1="v1" a2="v2" a3="v3"/>\r
+<e a1="w1" a2="v2"/>\r
+<e a1="v1" a2="w2" a3="v3"/>\r
+</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>X Y</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>£</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>เจมส์</doc>\r
--- /dev/null
+<!DOCTYPE เจมส์ [\r
+<!ELEMENT เจมส์ (#PCDATA)>\r
+]>\r
+<เจมส์></เจมส์>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>𐀀􏿽</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "<">\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA #IMPLIED>\r
+<!-- 34 is double quote -->\r
+<!ENTITY e1 """>\r
+]>\r
+<doc a1="&e1;"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc> </doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e " ">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!NOTATION n PUBLIC "whatever">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY % e "<!ELEMENT doc (#PCDATA)>">\r
+%e;\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a ID #IMPLIED>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a IDREF #IMPLIED>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a IDREFS #IMPLIED>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a ENTITY #IMPLIED>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a ENTITIES #IMPLIED>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a NOTATION (n1|n2) #IMPLIED>\r
+<!NOTATION n1 SYSTEM "http://www.w3.org/">\r
+<!NOTATION n2 SYSTEM "http://www.w3.org/">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a (1|2) #IMPLIED>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA #REQUIRED>\r
+]>\r
+<doc a="v"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA #FIXED "v">\r
+]>\r
+<doc a="v"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA #FIXED "v">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (a, b, c)>\r
+<!ELEMENT a (a?)>\r
+<!ELEMENT b (b*)>\r
+<!ELEMENT c (a | b)+>\r
+]>\r
+<doc><a/><b/><c><a/></c></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY % e SYSTEM "e.dtd">\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY % e PUBLIC 'whatever' "e.dtd">\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [<!ELEMENT doc (#PCDATA)>]><doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY % e "<foo>">\r
+<!ENTITY e "">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e "">\r
+<!ENTITY e "<foo>">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "<foo/>">\r
+<!ELEMENT doc (foo)>\r
+<!ELEMENT foo EMPTY>\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e "<foo>">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e "𐀀􏿽">\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ATTLIST e a NOTATION (n) #IMPLIED>\r
+<!ELEMENT doc (e)*>\r
+<!ELEMENT e (#PCDATA)>\r
+<!NOTATION n PUBLIC "whatever">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!NOTATION n SYSTEM "http://www.w3.org/">\r
+<!ENTITY e SYSTEM "http://www.w3.org/" NDATA n>\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a ENTITY "e">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (a)*>\r
+<!ELEMENT a EMPTY>\r
+]>\r
+<doc>\r
+<a/>\r
+ <a/> <a/>\r
+\r
+\r
+</doc>\r
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>
+
+
+</doc>
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY % e "foo">\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a1 CDATA "%e;">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ATTLIST doc a1 CDATA #IMPLIED>\r
+<!ATTLIST doc a1 NMTOKENS #IMPLIED>\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc a1="1 2"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ATTLIST doc a1 NMTOKENS " 1 2 ">\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!ATTLIST doc a2 CDATA #IMPLIED>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY % e SYSTEM "097.ent">\r
+<!ATTLIST doc a1 CDATA "v1">\r
+%e;\r
+<!ATTLIST doc a2 CDATA "v2">\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc><?pi x\r
+y?></doc>\r
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>\r
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ENTITY e PUBLIC ";!*#@$_%" "100.xml">\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e """>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA #IMPLIED>\r
+]>\r
+<doc a="""></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc><doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA #IMPLIED>\r
+]>\r
+<doc a="x y"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA #IMPLIED>\r
+]>\r
+<doc a="x	y"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA #IMPLIED>\r
+]>\r
+<doc a="x y"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA #IMPLIED>\r
+]>\r
+<doc a="x y"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e "\r
+">\r
+<!ATTLIST doc a CDATA #IMPLIED>\r
+]>\r
+<doc a="x&e;y"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a CDATA #IMPLIED>\r
+]>\r
+<doc a=""></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e " ">\r
+<!ATTLIST doc a CDATA #IMPLIED>\r
+]>\r
+<doc a="x&e;y"></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST doc a NMTOKENS #IMPLIED>\r
+]>\r
+<doc a=" x  y "></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (a | b)>\r
+<!ELEMENT a (#PCDATA)>\r
+]>\r
+<doc><a></a></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ATTLIST e a CDATA #IMPLIED>\r
+]>\r
+<doc></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e "<![CDATA[&foo;]]>">\r
+]>\r
+<doc>&e;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY e1 "&e2;">\r
+<!ENTITY e2 "v">\r
+]>\r
+<doc>&e1;</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+]>\r
+<doc><![CDATA[\r
+]]></doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY rsqb "]">\r
+]>\r
+<doc>]</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc (#PCDATA)>\r
+<!ENTITY rsqb "]]">\r
+]>\r
+<doc>]</doc>\r
--- /dev/null
+<!DOCTYPE doc [\r
+<!ELEMENT doc ANY>\r
+]>\r
+<doc><!-- -á --></doc>\r
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc> </doc>
\ No newline at end of file
--- /dev/null
+<doc>&<>"'</doc>
\ No newline at end of file
--- /dev/null
+<doc> </doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1" a2="v2"></doc>
\ No newline at end of file
--- /dev/null
+<doc :="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc _.-0123456789="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc abcdefghijklmnopqrstuvwxyz="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc ABCDEFGHIJKLMNOPQRSTUVWXYZ="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc><?pi ?></doc>
\ No newline at end of file
--- /dev/null
+<doc><?pi some data ? > <??></doc>
\ No newline at end of file
--- /dev/null
+<doc><foo></doc>
\ No newline at end of file
--- /dev/null
+<doc><&</doc>
\ No newline at end of file
--- /dev/null
+<doc><&]>]</doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc><foo></foo></doc>
\ No newline at end of file
--- /dev/null
+<doc><foo></foo><foo></foo></doc>
\ No newline at end of file
--- /dev/null
+<doc><foo></foo><foo></foo></doc>
\ No newline at end of file
--- /dev/null
+<doc><foo></foo><foo></foo></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc><?pi data?>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<?pi data?><doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a1=""<&>'"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="A"></doc>
\ No newline at end of file
--- /dev/null
+<doc>A</doc>
\ No newline at end of file
--- /dev/null
+<doc a1="foo bar"></doc>
\ No newline at end of file
--- /dev/null
+<doc> <e a1="v1" a2="v2" a3="v3"></e> <e a1="w1" a2="v2"></e> <e a1="v1" a2="w2" a3="v3"></e> </doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1" a2="v2"></doc>
\ No newline at end of file
--- /dev/null
+<doc>X Y</doc>
\ No newline at end of file
--- /dev/null
+<doc>]</doc>
\ No newline at end of file
--- /dev/null
+<doc>£</doc>
\ No newline at end of file
--- /dev/null
+<doc>เจมส์</doc>
\ No newline at end of file
--- /dev/null
+<เจมส์></เจมส์>
\ No newline at end of file
--- /dev/null
+<doc>ð€€ô¿½</doc>
\ No newline at end of file
--- /dev/null
+<doc><e></e></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<?pi data?><doc></doc>
\ No newline at end of file
--- /dev/null
+<doc>A</doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="1 2"></doc>
\ No newline at end of file
--- /dev/null
+<doc> <e a1="v1" a2="v2" a3="v3"></e> <e a1="w1" a2="v2"></e> <e a1="v1" a2="w2" a3="v3"></e> </doc>
\ No newline at end of file
--- /dev/null
+<doc>X Y</doc>
\ No newline at end of file
--- /dev/null
+<doc>£</doc>
\ No newline at end of file
--- /dev/null
+<doc>เจมส์</doc>
\ No newline at end of file
--- /dev/null
+<เจมส์></เจมส์>
\ No newline at end of file
--- /dev/null
+<doc>ð€€ô¿½</doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="""></doc>
\ No newline at end of file
--- /dev/null
+<doc> </doc>
\ No newline at end of file
--- /dev/null
+<doc> </doc>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [
+<!NOTATION n PUBLIC 'whatever'>
+]>
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [
+<!NOTATION n1 SYSTEM 'http://www.w3.org/'>
+<!NOTATION n2 SYSTEM 'http://www.w3.org/'>
+]>
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a="v"></doc>
\ No newline at end of file
--- /dev/null
+<doc a="v"></doc>
\ No newline at end of file
--- /dev/null
+<doc a="v"></doc>
\ No newline at end of file
--- /dev/null
+<doc><a></a><b></b><c><a></a></c></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc><foo></foo></doc>
\ No newline at end of file
--- /dev/null
+<doc><foo></doc>
\ No newline at end of file
--- /dev/null
+<doc>ð€€ô¿½ô¿¿</doc>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [
+<!NOTATION n PUBLIC 'whatever'>
+]>
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [
+<!NOTATION n SYSTEM 'http://www.w3.org/'>
+]>
+<doc a="e"></doc>
\ No newline at end of file
--- /dev/null
+<doc> <a></a> <a></a>	<a></a> </doc>
\ No newline at end of file
--- /dev/null
+<doc> </doc>
\ No newline at end of file
--- /dev/null
+<doc a1="%e;"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="1 2"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="1 2"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc><?pi x
+y?></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a="""></doc>
\ No newline at end of file
--- /dev/null
+<doc><doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a="x y"></doc>
\ No newline at end of file
--- /dev/null
+<doc a="x	y"></doc>
\ No newline at end of file
--- /dev/null
+<doc a="x y"></doc>
\ No newline at end of file
--- /dev/null
+<doc a="x y"></doc>
\ No newline at end of file
--- /dev/null
+<doc a="x y"></doc>
\ No newline at end of file
--- /dev/null
+<doc a=""></doc>
\ No newline at end of file
--- /dev/null
+<doc a="x y"></doc>
\ No newline at end of file
--- /dev/null
+<doc a="x y"></doc>
\ No newline at end of file
--- /dev/null
+<doc><a></a></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc>&foo;</doc>
\ No newline at end of file
--- /dev/null
+<doc>v</doc>
\ No newline at end of file
--- /dev/null
+<doc> </doc>
\ No newline at end of file
--- /dev/null
+<doc>]</doc>
\ No newline at end of file
--- /dev/null
+<doc>]]</doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ @(#)xmltest.xml 1.12 99/02/12
+ Copyright 1998-1999 by Sun Microsystems, Inc.
+ All Rights Reserved.
+-->
+
+<TESTCASES PROFILE="James Clark XMLTEST cases, 18-Nov-1998">
+
+<!-- Start: not-wf/sa -->
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-001"
+ URI="not-wf/sa/001.xml" SECTIONS="3.1 [41]">
+ Attribute values must start with attribute names, not "?". </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-002"
+ URI="not-wf/sa/002.xml" SECTIONS="2.3 [4]">
+ Names may not start with "."; it's not a Letter. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-003"
+ URI="not-wf/sa/003.xml" SECTIONS="2.6 [16]">
+ Processing Instruction target name is required.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-004"
+ URI="not-wf/sa/004.xml" SECTIONS="2.6 [16]">
+ SGML-ism: processing instructions end in '?>' not '>'. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-005"
+ URI="not-wf/sa/005.xml" SECTIONS="2.6 [16]">
+ Processing instructions end in '?>' not '?'. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-006"
+ URI="not-wf/sa/006.xml" SECTIONS="2.5 [16]">
+ XML comments may not contain "--" </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-007"
+ URI="not-wf/sa/007.xml" SECTIONS="4.1 [68]">
+ General entity references have no whitespace after the
+ entity name and before the semicolon. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-008"
+ URI="not-wf/sa/008.xml" SECTIONS="2.3 [5]">
+ Entity references must include names, which don't begin
+ with '.' (it's not a Letter or other name start character). </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-009"
+ URI="not-wf/sa/009.xml" SECTIONS="4.1 [66]">
+ Character references may have only decimal or numeric strings.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-010"
+ URI="not-wf/sa/010.xml" SECTIONS="4.1 [68]">
+ Ampersand may only appear as part of a general entity reference.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-011"
+ URI="not-wf/sa/011.xml" SECTIONS="3.1 [41]">
+ SGML-ism: attribute values must be explicitly assigned a
+ value, it can't act as a boolean toggle. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-012"
+ URI="not-wf/sa/012.xml" SECTIONS="2.3 [10]">
+ SGML-ism: attribute values must be quoted in all cases. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-013"
+ URI="not-wf/sa/013.xml" SECTIONS="2.3 [10]">
+ The quotes on both ends of an attribute value must match. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-014"
+ URI="not-wf/sa/014.xml" SECTIONS="2.3 [10]">
+ Attribute values may not contain literal '<' characters. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-015"
+ URI="not-wf/sa/015.xml" SECTIONS="3.1 [41]">
+ Attribute values need a value, not just an equals sign. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-016"
+ URI="not-wf/sa/016.xml" SECTIONS="3.1 [41]">
+ Attribute values need an associated name.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-017"
+ URI="not-wf/sa/017.xml" SECTIONS="2.7 [18]">
+ CDATA sections need a terminating ']]>'. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-018"
+ URI="not-wf/sa/018.xml" SECTIONS="2.7 [19]">
+ CDATA sections begin with a literal '<![CDATA[', no space.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-019"
+ URI="not-wf/sa/019.xml" SECTIONS="3.1 [42]">
+ End tags may not be abbreviated as '</>'.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-020"
+ URI="not-wf/sa/020.xml" SECTIONS="2.3 [10]">
+ Attribute values may not contain literal '&'
+ characters except as part of an entity reference. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-021"
+ URI="not-wf/sa/021.xml" SECTIONS="2.3 [10]">
+ Attribute values may not contain literal '&'
+ characters except as part of an entity reference. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-022"
+ URI="not-wf/sa/022.xml" SECTIONS="4.1 [66]">
+ Character references end with semicolons, always!</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-023"
+ URI="not-wf/sa/023.xml" SECTIONS="2.3 [5]">
+ Digits are not valid name start characters. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-024"
+ URI="not-wf/sa/024.xml" SECTIONS="2.3 [5]">
+ Digits are not valid name start characters. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-025"
+ URI="not-wf/sa/025.xml" SECTIONS="2.4 [14]">
+ Text may not contain a literal ']]>' sequence. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-026"
+ URI="not-wf/sa/026.xml" SECTIONS="2.4 [14]">
+ Text may not contain a literal ']]>' sequence. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-027"
+ URI="not-wf/sa/027.xml" SECTIONS="2.5 [15]">
+ Comments must be terminated with "-->".</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-028"
+ URI="not-wf/sa/028.xml" SECTIONS="2.6 [16]">
+ Processing instructions must end with '?>'. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-029"
+ URI="not-wf/sa/029.xml" SECTIONS="2.4 [14]">
+ Text may not contain a literal ']]>' sequence. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-030"
+ URI="not-wf/sa/030.xml" SECTIONS="2.2 [2]">
+ A form feed is not a legal XML character. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-031"
+ URI="not-wf/sa/031.xml" SECTIONS="2.2 [2]">
+ A form feed is not a legal XML character. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-032"
+ URI="not-wf/sa/032.xml" SECTIONS="2.2 [2]">
+ A form feed is not a legal XML character. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-033"
+ URI="not-wf/sa/033.xml" SECTIONS="2.2 [2]">
+ An ESC (octal 033) is not a legal XML character. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-034"
+ URI="not-wf/sa/034.xml" SECTIONS="2.2 [2]">
+ A form feed is not a legal XML character. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-035"
+ URI="not-wf/sa/035.xml" SECTIONS="3.1 [43]">
+ The '<' character is a markup delimiter and must
+ start an element, CDATA section, PI, or comment. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-036"
+ URI="not-wf/sa/036.xml" SECTIONS="2.8 [27]">
+ Text may not appear after the root element. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-037"
+ URI="not-wf/sa/037.xml" SECTIONS="2.8 [27]">
+ Character references may not appear after the root element. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-038"
+ URI="not-wf/sa/038.xml" SECTIONS="3.1">
+ Tests the "Unique Att Spec" WF constraint by providing
+ multiple values for an attribute.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-039"
+ URI="not-wf/sa/039.xml" SECTIONS="3">
+ Tests the Element Type Match WFC - end tag name must
+ match start tag name.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-040"
+ URI="not-wf/sa/040.xml" SECTIONS="2.8 [27]">
+ Provides two document elements.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-041"
+ URI="not-wf/sa/041.xml" SECTIONS="2.8 [27]">
+ Provides two document elements.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-042"
+ URI="not-wf/sa/042.xml" SECTIONS="3.1 [42]">
+ Invalid End Tag </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-043"
+ URI="not-wf/sa/043.xml" SECTIONS="2.8 [27]">
+ Provides #PCDATA text after the document element. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-044"
+ URI="not-wf/sa/044.xml" SECTIONS="2.8 [27]">
+ Provides two document elements.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-045"
+ URI="not-wf/sa/045.xml" SECTIONS="3.1 [44]">
+ Invalid Empty Element Tag </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-046"
+ URI="not-wf/sa/046.xml" SECTIONS="3.1 [40]">
+ This start (or empty element) tag was not terminated correctly. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-047"
+ URI="not-wf/sa/047.xml" SECTIONS="3.1 [44]">
+ Invalid empty element tag invalid whitespace </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-048"
+ URI="not-wf/sa/048.xml" SECTIONS="2.8 [27]">
+ Provides a CDATA section after the root element.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-049"
+ URI="not-wf/sa/049.xml" SECTIONS="3.1 [40]">
+ Missing start tag </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-050"
+ URI="not-wf/sa/050.xml" SECTIONS="2.1 [1]">
+ Empty document, with no root element. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-051"
+ URI="not-wf/sa/051.xml" SECTIONS="2.7 [18]">
+ CDATA is invalid at top level of document.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-052"
+ URI="not-wf/sa/052.xml" SECTIONS="4.1 [66]">
+ Invalid character reference. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-053"
+ URI="not-wf/sa/053.xml" SECTIONS="3.1 [42]">
+ End tag does not match start tag. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-054"
+ URI="not-wf/sa/054.xml" SECTIONS="4.2.2 [75]">
+ PUBLIC requires two literals.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-055"
+ URI="not-wf/sa/055.xml" SECTIONS="2.8 [28]">
+ Invalid Document Type Definition format. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-056"
+ URI="not-wf/sa/056.xml" SECTIONS="2.8 [28]">
+ Invalid Document Type Definition format - misplaced comment. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-057"
+ URI="not-wf/sa/057.xml" SECTIONS="3.2 [45]">
+ This isn't SGML; comments can't exist in declarations. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-058"
+ URI="not-wf/sa/058.xml" SECTIONS="3.3.1 [54]">
+ Invalid character , in ATTLIST enumeration </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-059"
+ URI="not-wf/sa/059.xml" SECTIONS="3.3.1 [59]">
+ String literal must be in quotes. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-060"
+ URI="not-wf/sa/060.xml" SECTIONS="3.3.1 [56]">
+ Invalid type NAME defined in ATTLIST.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-061"
+ URI="not-wf/sa/061.xml" SECTIONS="4.2.2 [75]">
+ External entity declarations require whitespace between public
+ and system IDs.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-062"
+ URI="not-wf/sa/062.xml" SECTIONS="4.2 [71]">
+ Entity declarations need space after the entity name. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-063"
+ URI="not-wf/sa/063.xml" SECTIONS="2.8 [29]">
+ Conditional sections may only appear in the external
+ DTD subset. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-064"
+ URI="not-wf/sa/064.xml" SECTIONS="3.3 [53]">
+ Space is required between attribute type and default values
+ in <!ATTLIST...> declarations. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-065"
+ URI="not-wf/sa/065.xml" SECTIONS="3.3 [53]">
+ Space is required between attribute name and type
+ in <!ATTLIST...> declarations. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-066"
+ URI="not-wf/sa/066.xml" SECTIONS="3.3 [52]">
+ Required whitespace is missing. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-067"
+ URI="not-wf/sa/067.xml" SECTIONS="3.3 [53]">
+ Space is required between attribute type and default values
+ in <!ATTLIST...> declarations. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-068"
+ URI="not-wf/sa/068.xml" SECTIONS="3.3.1 [58]">
+ Space is required between NOTATION keyword and list of
+ enumerated choices in <!ATTLIST...> declarations. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-069"
+ URI="not-wf/sa/069.xml" SECTIONS="4.2.2 [76]">
+ Space is required before an NDATA entity annotation.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-070"
+ URI="not-wf/sa/070.xml" SECTIONS="2.5 [16]">
+ XML comments may not contain "--" </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-071"
+ URI="not-wf/sa/071.xml" SECTIONS="4.1 [68]">
+ ENTITY can't reference itself directly or indirectly.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-072"
+ URI="not-wf/sa/072.xml" SECTIONS="4.1 [68]">
+ Undefined ENTITY foo. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-073"
+ URI="not-wf/sa/073.xml" SECTIONS="4.1 [68]">
+ Undefined ENTITY f. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-074"
+ URI="not-wf/sa/074.xml" SECTIONS="4.3.2">
+ Internal general parsed entities are only well formed if
+ they match the "content" production. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-075"
+ URI="not-wf/sa/075.xml" SECTIONS="4.1 [68]">
+ ENTITY can't reference itself directly or indirectly. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-076"
+ URI="not-wf/sa/076.xml" SECTIONS="4.1 [68]">
+ Undefined ENTITY foo. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-077"
+ URI="not-wf/sa/077.xml" SECTIONS="41. [68]">
+ Undefined ENTITY bar. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-078"
+ URI="not-wf/sa/078.xml" SECTIONS="4.1 [68]">
+ Undefined ENTITY foo. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-079"
+ URI="not-wf/sa/079.xml" SECTIONS="4.1 [68]">
+ ENTITY can't reference itself directly or indirectly. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-080"
+ URI="not-wf/sa/080.xml" SECTIONS="4.1 [68]">
+ ENTITY can't reference itself directly or indirectly. </TEST>
+<TEST TYPE="not-wf" ENTITIES="general" ID="not-wf-sa-081"
+ URI="not-wf/sa/081.xml" SECTIONS="3.1">
+ This tests the <EM>No External Entity References</EM> WFC,
+ since the entity is referred to within an attribute. </TEST>
+<TEST TYPE="not-wf" ENTITIES="general" ID="not-wf-sa-082"
+ URI="not-wf/sa/082.xml" SECTIONS="3.1">
+ This tests the <EM>No External Entity References</EM> WFC,
+ since the entity is referred to within an attribute. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-083"
+ URI="not-wf/sa/083.xml" SECTIONS="4.2.2 [76]">
+ Undefined NOTATION n. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-084"
+ URI="not-wf/sa/084.xml" SECTIONS="4.1">
+ Tests the <EM>Parsed Entity</EM> WFC by referring to an
+ unparsed entity. (This precedes the error of not declaring
+ that entity's notation, which may be detected any time before
+ the DTD parsing is completed.) </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-085"
+ URI="not-wf/sa/085.xml" SECTIONS="2.3 [13]">
+ Public IDs may not contain "[". </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-086"
+ URI="not-wf/sa/086.xml" SECTIONS="2.3 [13]">
+ Public IDs may not contain "[". </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-087"
+ URI="not-wf/sa/087.xml" SECTIONS="2.3 [13]">
+ Public IDs may not contain "[". </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-088"
+ URI="not-wf/sa/088.xml" SECTIONS="2.3 [10]">
+ Attribute values are terminated by literal quote characters,
+ and any entity expansion is done afterwards. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-089"
+ URI="not-wf/sa/089.xml" SECTIONS="4.2 [74]">
+ Parameter entities "are" always parsed; NDATA annotations
+ are not permitted.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-090"
+ URI="not-wf/sa/090.xml" SECTIONS="2.3 [10]">
+ Attributes may not contain a literal "<" character;
+ this one has one because of reference expansion. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-091"
+ URI="not-wf/sa/091.xml" SECTIONS="4.2 [74]">
+ Parameter entities "are" always parsed; NDATA annotations
+ are not permitted.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-092"
+ URI="not-wf/sa/092.xml" SECTIONS="4.5">
+ The replacement text of this entity has an illegal reference,
+ because the character reference is expanded immediately. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-093"
+ URI="not-wf/sa/093.xml" SECTIONS="4.1 [66]">
+ Hexadecimal character references may not use the uppercase 'X'.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-094"
+ URI="not-wf/sa/094.xml" SECTIONS="2.8 [24]">
+ Prolog VERSION must be lowercase. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-095"
+ URI="not-wf/sa/095.xml" SECTIONS="2.8 [23]">
+ VersionInfo must come before EncodingDecl. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-096"
+ URI="not-wf/sa/096.xml" SECTIONS="2.9 [32]">
+ Space is required before the standalone declaration. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-097"
+ URI="not-wf/sa/097.xml" SECTIONS="2.8 [24]">
+ Both quotes surrounding VersionNum must be the same. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-098"
+ URI="not-wf/sa/098.xml" SECTIONS="2.8 [23]">
+ Only one "version=..." string may appear in an XML declaration.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-099"
+ URI="not-wf/sa/099.xml" SECTIONS="2.8 [23]">
+ Only three pseudo-attributes are in the XML declaration,
+ and "valid=..." is not one of them. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-100"
+ URI="not-wf/sa/100.xml" SECTIONS="2.9 [32]">
+ Only "yes" and "no" are permitted as values of "standalone". </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-101"
+ URI="not-wf/sa/101.xml" SECTIONS="4.3.3 [81]">
+ Space is not permitted in an encoding name. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-102"
+ URI="not-wf/sa/102.xml" SECTIONS="2.8 [26]">
+ Provides an illegal XML version number; spaces are illegal.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-103"
+ URI="not-wf/sa/103.xml" SECTIONS="4.3.2">
+ End-tag required for element foo. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-104"
+ URI="not-wf/sa/104.xml" SECTIONS="4.3.2">
+ Internal general parsed entities are only well formed if
+ they match the "content" production. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-105"
+ URI="not-wf/sa/105.xml" SECTIONS="2.7 ">
+ Invalid placement of CDATA section. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-106"
+ URI="not-wf/sa/106.xml" SECTIONS="4.2">
+ Invalid placement of entity declaration. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-107"
+ URI="not-wf/sa/107.xml" SECTIONS="2.8 [28]">
+ Invalid document type declaration. CDATA alone is invalid.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-108"
+ URI="not-wf/sa/108.xml" SECTIONS="2.7 [19]">
+ No space in '<![CDATA['.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-109"
+ URI="not-wf/sa/109.xml" SECTIONS="4.2 [70]">
+ Tags invalid within EntityDecl. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-110"
+ URI="not-wf/sa/110.xml" SECTIONS="4.1 [68]">
+ Entity reference must be in content of element. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-111"
+ URI="not-wf/sa/111.xml" SECTIONS="3.1 [43]">
+ Entiry reference must be in content of element not Start-tag. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-112"
+ URI="not-wf/sa/112.xml" SECTIONS="2.7 [19]">
+ CDATA sections start '<![CDATA[', not '<!cdata['.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-113"
+ URI="not-wf/sa/113.xml" SECTIONS="2.3 [9]">
+ Parameter entity values must use valid reference syntax;
+ this reference is malformed.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-114"
+ URI="not-wf/sa/114.xml" SECTIONS="2.3 [9]">
+ General entity values must use valid reference syntax;
+ this reference is malformed.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-115"
+ URI="not-wf/sa/115.xml" SECTIONS="4.5">
+ The replacement text of this entity is an illegal character
+ reference, which must be rejected when it is parsed in the
+ context of an attribute value.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-116"
+ URI="not-wf/sa/116.xml" SECTIONS="4.3.2">
+ Internal general parsed entities are only well formed if
+ they match the "content" production. This is a partial
+ character reference, not a full one. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-117"
+ URI="not-wf/sa/117.xml" SECTIONS="4.3.2">
+ Internal general parsed entities are only well formed if
+ they match the "content" production. This is a partial
+ character reference, not a full one. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-118"
+ URI="not-wf/sa/118.xml" SECTIONS="4.1 [68]">
+ Entity reference expansion is not recursive.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-119"
+ URI="not-wf/sa/119.xml" SECTIONS="4.3.2">
+ Internal general parsed entities are only well formed if
+ they match the "content" production. This is a partial
+ character reference, not a full one. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-120"
+ URI="not-wf/sa/120.xml" SECTIONS="4.5">
+ Character references are expanded in the replacement text of
+ an internal entity, which is then parsed as usual. Accordingly,
+ & must be doubly quoted - encoded either as <EM>&amp;</EM>
+ or as <EM>&#38;#38;</EM>. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-121"
+ URI="not-wf/sa/121.xml" SECTIONS="4.1 [68]">
+ A name of an ENTITY was started with an invalid character. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-122"
+ URI="not-wf/sa/122.xml" SECTIONS="3.2.1 [47]">
+ Invalid syntax mixed connectors are used. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-123"
+ URI="not-wf/sa/123.xml" SECTIONS="3.2.1 [48]">
+ Invalid syntax mismatched parenthesis. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-124"
+ URI="not-wf/sa/124.xml" SECTIONS="3.2.2 [51]">
+ Invalid format of Mixed-content declaration. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-125"
+ URI="not-wf/sa/125.xml" SECTIONS="3.2.2 [51]">
+ Invalid syntax extra set of parenthesis not necessary. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-126"
+ URI="not-wf/sa/126.xml" SECTIONS="3.2.2 [51]">
+ Invalid syntax Mixed-content must be defined as zero or more. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-127"
+ URI="not-wf/sa/127.xml" SECTIONS="3.2.2 [51]">
+ Invalid syntax Mixed-content must be defined as zero or more. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-128"
+ URI="not-wf/sa/128.xml" SECTIONS="2.7 [18]">
+ Invalid CDATA syntax. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-129"
+ URI="not-wf/sa/129.xml" SECTIONS="3.2 [45]">
+ Invalid syntax for Element Type Declaration. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-130"
+ URI="not-wf/sa/130.xml" SECTIONS="3.2 [45]">
+ Invalid syntax for Element Type Declaration. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-131"
+ URI="not-wf/sa/131.xml" SECTIONS="3.2 [45]">
+ Invalid syntax for Element Type Declaration. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-132"
+ URI="not-wf/sa/132.xml" SECTIONS="3.2.1 [50]">
+ Invalid syntax mixed connectors used. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-133"
+ URI="not-wf/sa/133.xml" SECTIONS="3.2.1">
+ Illegal whitespace before optional character causes syntax error. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-134"
+ URI="not-wf/sa/134.xml" SECTIONS="3.2.1">
+ Illegal whitespace before optional character causes syntax error. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-135"
+ URI="not-wf/sa/135.xml" SECTIONS="3.2.1 [47]">
+ Invalid character used as connector. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-136"
+ URI="not-wf/sa/136.xml" SECTIONS="3.2 [45]">
+ Tag omission is invalid in XML. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-137"
+ URI="not-wf/sa/137.xml" SECTIONS="3.2 [45]">
+ Space is required before a content model. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-138"
+ URI="not-wf/sa/138.xml" SECTIONS="3.2.1 [48]">
+ Invalid syntax for content particle. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-139"
+ URI="not-wf/sa/139.xml" SECTIONS="3.2.1 [46]">
+ The element-content model should not be empty. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-140"
+ URI="not-wf/sa/140.xml" SECTIONS="2.3 [4]"
+ EDITION="1 2 3 4">
+ Character '&#x309a;' is a CombiningChar, not a
+ Letter, and so may not begin a name.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-141"
+ URI="not-wf/sa/141.xml" SECTIONS="2.3 [5]"
+ EDITION="1 2 3 4">
+ Character #x0E5C is not legal in XML names. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-142"
+ URI="not-wf/sa/142.xml" SECTIONS="2.2 [2]">
+ Character #x0000 is not legal anywhere in an XML document. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-143"
+ URI="not-wf/sa/143.xml" SECTIONS="2.2 [2]">
+ Character #x001F is not legal anywhere in an XML document. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-144"
+ URI="not-wf/sa/144.xml" SECTIONS="2.2 [2]">
+ Character #xFFFF is not legal anywhere in an XML document. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-145"
+ URI="not-wf/sa/145.xml" SECTIONS="2.2 [2]">
+ Character #xD800 is not legal anywhere in an XML document. (If it
+ appeared in a UTF-16 surrogate pair, it'd represent half of a UCS-4
+ character and so wouldn't really be in the document.) </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-146"
+ URI="not-wf/sa/146.xml" SECTIONS="2.2 [2]">
+ Character references must also refer to legal XML characters;
+ #x00110000 is one more than the largest legal character.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-147"
+ URI="not-wf/sa/147.xml" SECTIONS="2.8 [22]">
+ XML Declaration may not be preceded by whitespace.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-148"
+ URI="not-wf/sa/148.xml" SECTIONS="2.8 [22]">
+ XML Declaration may not be preceded by comments or whitespace.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-149"
+ URI="not-wf/sa/149.xml" SECTIONS="2.8 [28]">
+ XML Declaration may not be within a DTD.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-150"
+ URI="not-wf/sa/150.xml" SECTIONS="3.1 [43]">
+ XML declarations may not be within element content. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-151"
+ URI="not-wf/sa/151.xml" SECTIONS="2.8 [27]">
+ XML declarations may not follow document content.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-152"
+ URI="not-wf/sa/152.xml" SECTIONS="2.8 [22]">
+ XML declarations must include the "version=..." string.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-153"
+ URI="not-wf/sa/153.xml" SECTIONS="4.3.2">
+ Text declarations may not begin internal parsed entities;
+ they may only appear at the beginning of external parsed
+ (parameter or general) entities. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-154"
+ URI="not-wf/sa/154.xml" SECTIONS="2.8 2.6 [23, 17]">
+ '<?XML ...?>' is neither an XML declaration
+ nor a legal processing instruction target name. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-155"
+ URI="not-wf/sa/155.xml" SECTIONS="2.8 2.6 [23, 17]">
+ '<?xmL ...?>' is neither an XML declaration
+ nor a legal processing instruction target name. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-156"
+ URI="not-wf/sa/156.xml" SECTIONS="2.8 2.6 [23, 17]">
+ '<?xMl ...?>' is neither an XML declaration
+ nor a legal processing instruction target name. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-157"
+ URI="not-wf/sa/157.xml" SECTIONS="2.6 [17]">
+ '<?xmL ...?>' is not a legal processing instruction
+ target name. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-158"
+ URI="not-wf/sa/158.xml" SECTIONS="3.3 [52]">
+ SGML-ism: "#NOTATION gif" can't have attributes. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-159"
+ URI="not-wf/sa/159.xml" SECTIONS="2.3 [9]">
+ Uses '&' unquoted in an entity declaration,
+ which is illegal syntax for an entity reference.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-160"
+ URI="not-wf/sa/160.xml" SECTIONS="2.8">
+ Violates the <EM>PEs in Internal Subset</EM> WFC
+ by using a PE reference within a declaration. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-161"
+ URI="not-wf/sa/161.xml" SECTIONS="2.8">
+ Violates the <EM>PEs in Internal Subset</EM> WFC
+ by using a PE reference within a declaration. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-162"
+ URI="not-wf/sa/162.xml" SECTIONS="2.8">
+ Violates the <EM>PEs in Internal Subset</EM> WFC
+ by using a PE reference within a declaration. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-163"
+ URI="not-wf/sa/163.xml" SECTIONS="4.1 [69]">
+ Invalid placement of Parameter entity reference. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-164"
+ URI="not-wf/sa/164.xml" SECTIONS="4.1 [69]">
+ Invalid placement of Parameter entity reference. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-165"
+ URI="not-wf/sa/165.xml" SECTIONS="4.2 [72]">
+ Parameter entity declarations must have a space before
+ the '%'. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-166"
+ URI="not-wf/sa/166.xml" SECTIONS="2.2 [2]">
+ Character FFFF is not legal anywhere in an XML document. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-167"
+ URI="not-wf/sa/167.xml" SECTIONS="2.2 [2]">
+ Character FFFE is not legal anywhere in an XML document. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-168"
+ URI="not-wf/sa/168.xml" SECTIONS="2.2 [2]">
+ An unpaired surrogate (D800) is not legal anywhere
+ in an XML document.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-169"
+ URI="not-wf/sa/169.xml" SECTIONS="2.2 [2]">
+ An unpaired surrogate (DC00) is not legal anywhere
+ in an XML document.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-170"
+ URI="not-wf/sa/170.xml" SECTIONS="2.2 [2]">
+ Four byte UTF-8 encodings can encode UCS-4 characters
+ which are beyond the range of legal XML characters
+ (and can't be expressed in Unicode surrogate pairs).
+ This document holds such a character. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-171"
+ URI="not-wf/sa/171.xml" SECTIONS="2.2 [2]">
+ Character FFFF is not legal anywhere in an XML document. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-172"
+ URI="not-wf/sa/172.xml" SECTIONS="2.2 [2]">
+ Character FFFF is not legal anywhere in an XML document. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-173"
+ URI="not-wf/sa/173.xml" SECTIONS="2.2 [2]">
+ Character FFFF is not legal anywhere in an XML document. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-174"
+ URI="not-wf/sa/174.xml" SECTIONS="2.2 [2]">
+ Character FFFF is not legal anywhere in an XML document. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-175"
+ URI="not-wf/sa/175.xml" SECTIONS="2.2 [2]">
+ Character FFFF is not legal anywhere in an XML document. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-176"
+ URI="not-wf/sa/176.xml" SECTIONS="3 [39]">
+ Start tags must have matching end tags.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-177"
+ URI="not-wf/sa/177.xml" SECTIONS="2.2 [2]">
+ Character FFFF is not legal anywhere in an XML document. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-178"
+ URI="not-wf/sa/178.xml" SECTIONS="3.1 [41]">
+ Invalid syntax matching double quote is missing. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-179"
+ URI="not-wf/sa/179.xml" SECTIONS="4.1 [66]">
+ Invalid syntax matching double quote is missing. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-180"
+ URI="not-wf/sa/180.xml" SECTIONS="4.1">
+ The <EM>Entity Declared</EM> WFC requires entities to be declared
+ before they are used in an attribute list declaration. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-181"
+ URI="not-wf/sa/181.xml" SECTIONS="4.3.2">
+ Internal parsed entities must match the <EM>content</EM>
+ production to be well formed. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-182"
+ URI="not-wf/sa/182.xml" SECTIONS="4.3.2">
+ Internal parsed entities must match the <EM>content</EM>
+ production to be well formed. </TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-183"
+ URI="not-wf/sa/183.xml" SECTIONS="3.2.2 [51]">
+ Mixed content declarations may not include content particles.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-184"
+ URI="not-wf/sa/184.xml" SECTIONS="3.2.2 [51]">
+ In mixed content models, element names must not be
+ parenthesized. </TEST>
+<TEST TYPE="not-wf" ENTITIES="parameter" ID="not-wf-sa-185"
+ URI="not-wf/sa/185.xml" SECTIONS="4.1">
+ Tests the <EM>Entity Declared</EM> WFC.
+ <EM>Note:</EM> a nonvalidating parser is permitted not to report
+ this WFC violation, since it would need to read an external
+ parameter entity to distinguish it from a violation of
+ the <EM>Standalone Declaration</EM> VC.</TEST>
+<TEST TYPE="not-wf" ENTITIES="none" ID="not-wf-sa-186"
+ URI="not-wf/sa/186.xml" SECTIONS="3.1 [44]">
+ Whitespace is required between attribute/value pairs. </TEST>
+
+<!-- Start: not-wf/not-sa -->
+<TEST TYPE="not-wf" ENTITIES="both" ID="not-wf-not-sa-001"
+ URI="not-wf/not-sa/001.xml" SECTIONS="3.4 [62]">
+ Conditional sections must be properly terminated ("]>" used
+ instead of "]]>"). </TEST>
+<TEST TYPE="not-wf" ENTITIES="both" ID="not-wf-not-sa-002"
+ URI="not-wf/not-sa/002.xml" SECTIONS="2.6 [17]">
+ Processing instruction target names may not be "XML"
+ in any combination of cases. </TEST>
+<TEST TYPE="not-wf" ENTITIES="both" ID="not-wf-not-sa-003"
+ URI="not-wf/not-sa/003.xml" SECTIONS="3.4 [62]">
+ Conditional sections must be properly terminated ("]]>" omitted). </TEST>
+<TEST TYPE="not-wf" ENTITIES="both" ID="not-wf-not-sa-004"
+ URI="not-wf/not-sa/004.xml" SECTIONS="3.4 [62]">
+ Conditional sections must be properly terminated ("]]>" omitted). </TEST>
+<TEST TYPE="error" ENTITIES="both" ID="not-wf-not-sa-005"
+ URI="not-wf/not-sa/005.xml" SECTIONS="4.1">
+ Tests the <EM>Entity Declared</EM> VC by referring to an
+ undefined parameter entity within an external entity.</TEST>
+<TEST TYPE="not-wf" ENTITIES="both" ID="not-wf-not-sa-006"
+ URI="not-wf/not-sa/006.xml" SECTIONS="3.4 [62]">
+ Conditional sections need a '[' after the INCLUDE or IGNORE. </TEST>
+<TEST TYPE="not-wf" ENTITIES="both" ID="not-wf-not-sa-007"
+ URI="not-wf/not-sa/007.xml" SECTIONS="4.3.2 [79]">
+ A <!DOCTYPE ...> declaration may not begin any external
+ entity; it's only found once, in the document entity.</TEST>
+<TEST TYPE="not-wf" ENTITIES="both" ID="not-wf-not-sa-008"
+ URI="not-wf/not-sa/008.xml" SECTIONS="4.1 [69]">
+ In DTDs, the '%' character must be part of a parameter
+ entity reference.</TEST>
+<TEST TYPE="not-wf" ENTITIES="both" ID="not-wf-not-sa-009"
+ URI="not-wf/not-sa/009.xml" SECTIONS="2.8">
+ This test violates WFC:PE Between Declarations in Production 28a.
+ The last character of a markup declaration is not contained in the same
+ parameter-entity text replacement.</TEST>
+<!-- Start: not-wf/ext-sa -->
+<TEST TYPE="not-wf" ENTITIES="both" ID="not-wf-ext-sa-001"
+ URI="not-wf/ext-sa/001.xml" SECTIONS="4.1">
+ Tests the <EM>No Recursion</EM> WFC by having an external general
+ entity be self-recursive.</TEST>
+<TEST TYPE="not-wf" ENTITIES="both" ID="not-wf-ext-sa-002"
+ URI="not-wf/ext-sa/002.xml" SECTIONS="4.3.1 4.3.2 [77, 78]">
+ External entities have "text declarations", which do
+ not permit the "standalone=..." attribute that's allowed
+ in XML declarations.</TEST>
+<TEST TYPE="not-wf" ENTITIES="both" ID="not-wf-ext-sa-003"
+ URI="not-wf/ext-sa/003.xml" SECTIONS="2.6 [17]">
+ Only one text declaration is permitted; a second one
+ looks like an illegal processing instruction (target names
+ of "xml" in any case are not allowed). </TEST>
+
+
+<!-- Start: invalid/ -->
+
+<TEST TYPE="invalid" ENTITIES="both" ID="invalid--002"
+ URI="invalid/002.xml" SECTIONS="3.2.1">
+ Tests the "Proper Group/PE Nesting" validity constraint by
+ fragmenting a content model between two parameter entities.</TEST>
+<TEST TYPE="invalid" ENTITIES="both" ID="invalid--005"
+ URI="invalid/005.xml" SECTIONS="2.8">
+ Tests the "Proper Declaration/PE Nesting" validity constraint by
+ fragmenting an element declaration between two parameter entities.</TEST>
+<TEST TYPE="invalid" ENTITIES="both" ID="invalid--006"
+ URI="invalid/006.xml" SECTIONS="2.8">
+ Tests the "Proper Declaration/PE Nesting" validity constraint by
+ fragmenting an element declaration between two parameter entities.</TEST>
+<TEST TYPE="invalid" ENTITIES="both" ID="invalid-not-sa-022"
+ URI="invalid/not-sa/022.xml" SECTIONS="3.4 [62]"
+ OUTPUT="invalid/not-sa/out/022.xml">
+ Test the "Proper Conditional Section/ PE Nesting" validity constraint. </TEST>
+
+<!-- Start: valid/sa -->
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-001"
+ URI="valid/sa/001.xml" SECTIONS="3.2.2 [51]"
+ OUTPUT="valid/sa/out/001.xml">
+ Test demonstrates an Element Type Declaration with Mixed Content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-002"
+ URI="valid/sa/002.xml" SECTIONS="3.1 [40]"
+ OUTPUT="valid/sa/out/002.xml">
+ Test demonstrates that whitespace is permitted after the tag name in a Start-tag. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-003"
+ URI="valid/sa/003.xml" SECTIONS="3.1 [42]"
+ OUTPUT="valid/sa/out/003.xml">
+ Test demonstrates that whitespace is permitted after the tag name in an End-tag.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-004"
+ URI="valid/sa/004.xml" SECTIONS="3.1 [41]"
+ OUTPUT="valid/sa/out/004.xml">
+ Test demonstrates a valid attribute specification within a Start-tag. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-005"
+ URI="valid/sa/005.xml" SECTIONS="3.1 [40]"
+ OUTPUT="valid/sa/out/005.xml">
+ Test demonstrates a valid attribute specification within a Start-tag that
+contains whitespace on both sides of the equal sign. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-006"
+ URI="valid/sa/006.xml" SECTIONS="3.1 [41]"
+ OUTPUT="valid/sa/out/006.xml">
+ Test demonstrates that the AttValue within a Start-tag can use a single quote as a delimter. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-007"
+ URI="valid/sa/007.xml" SECTIONS="3.1 4.6 [43]"
+ OUTPUT="valid/sa/out/007.xml">
+ Test demonstrates numeric character references can be used for element content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-008"
+ URI="valid/sa/008.xml" SECTIONS="2.4 3.1 [43]"
+ OUTPUT="valid/sa/out/008.xml">
+ Test demonstrates character references can be used for element content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-009"
+ URI="valid/sa/009.xml" SECTIONS="2.3 3.1 [43]"
+ OUTPUT="valid/sa/out/009.xml">
+ Test demonstrates that PubidChar can be used for element content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-010"
+ URI="valid/sa/010.xml" SECTIONS="3.1 [40]"
+ OUTPUT="valid/sa/out/010.xml">
+ Test demonstrates that whitespace is valid after the Attribute in a Start-tag. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-011"
+ URI="valid/sa/011.xml" SECTIONS="3.1 [40]"
+ OUTPUT="valid/sa/out/011.xml">
+ Test demonstrates mutliple Attibutes within the Start-tag. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-012"
+ URI="valid/sa/012.xml" SECTIONS="2.3 [4]"
+ OUTPUT="valid/sa/out/012.xml" NAMESPACE="no">
+ Uses a legal XML 1.0 name consisting of a single colon
+ character (disallowed by the latest XML Namespaces draft).</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-013"
+ URI="valid/sa/013.xml" SECTIONS="2.3 3.1 [13] [40]"
+ OUTPUT="valid/sa/out/013.xml">
+ Test demonstrates that the Attribute in a Start-tag can consist of numerals along with special characters. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-014"
+ URI="valid/sa/014.xml" SECTIONS="2.3 3.1 [13] [40]"
+ OUTPUT="valid/sa/out/014.xml">
+ Test demonstrates that all lower case letters are valid for the Attribute in a Start-tag. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-015"
+ URI="valid/sa/015.xml" SECTIONS="2.3 3.1 [13] [40]"
+ OUTPUT="valid/sa/out/015.xml">
+ Test demonstrates that all upper case letters are valid for the Attribute in a Start-tag. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-016"
+ URI="valid/sa/016.xml" SECTIONS="2.6 3.1 [16] [43]"
+ OUTPUT="valid/sa/out/016.xml">
+ Test demonstrates that Processing Instructions are valid element content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-017"
+ URI="valid/sa/017.xml" SECTIONS="2.6 3.1 [16] [43]"
+ OUTPUT="valid/sa/out/017.xml">
+ Test demonstrates that Processing Instructions are valid element content and there can be more than one. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-018"
+ URI="valid/sa/018.xml" SECTIONS="2.7 3.1 [18] [43]"
+ OUTPUT="valid/sa/out/018.xml">
+ Test demonstrates that CDATA sections are valid element content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-019"
+ URI="valid/sa/019.xml" SECTIONS="2.7 3.1 [18] [43]"
+ OUTPUT="valid/sa/out/019.xml">
+ Test demonstrates that CDATA sections are valid element content and that
+ampersands may occur in their literal form. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-020"
+ URI="valid/sa/020.xml" SECTIONS="2.7 3.1 [18] [43]"
+ OUTPUT="valid/sa/out/020.xml">
+ Test demonstractes that CDATA sections are valid element content and that
+everyting between the CDStart and CDEnd is recognized as character data not markup. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-021"
+ URI="valid/sa/021.xml" SECTIONS="2.5 3.1 [15] [43]"
+ OUTPUT="valid/sa/out/021.xml">
+ Test demonstrates that comments are valid element content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-022"
+ URI="valid/sa/022.xml" SECTIONS="2.5 3.1 [15] [43]"
+ OUTPUT="valid/sa/out/022.xml">
+ Test demonstrates that comments are valid element content and that all characters before the double-hypen right angle combination are considered part of thecomment. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-023"
+ URI="valid/sa/023.xml" SECTIONS="3.1 [43]"
+ OUTPUT="valid/sa/out/023.xml">
+ Test demonstrates that Entity References are valid element content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-024"
+ URI="valid/sa/024.xml" SECTIONS="3.1 4.1 [43] [66]"
+ OUTPUT="valid/sa/out/024.xml">
+ Test demonstrates that Entity References are valid element content and also demonstrates a valid Entity Declaration. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-025"
+ URI="valid/sa/025.xml" SECTIONS="3.2 [46]"
+ OUTPUT="valid/sa/out/025.xml">
+ Test demonstrates an Element Type Declaration and that the contentspec can be of mixed content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-026"
+ URI="valid/sa/026.xml" SECTIONS="3.2 [46]"
+ OUTPUT="valid/sa/out/026.xml">
+ Test demonstrates an Element Type Declaration and that EMPTY is a valid contentspec. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-027"
+ URI="valid/sa/027.xml" SECTIONS="3.2 [46]"
+ OUTPUT="valid/sa/out/027.xml">
+ Test demonstrates an Element Type Declaration and that ANY is a valid contenspec. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-028"
+ URI="valid/sa/028.xml" SECTIONS="2.8 [24]"
+ OUTPUT="valid/sa/out/028.xml">
+ Test demonstrates a valid prolog that uses double quotes as delimeters around the VersionNum. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-029"
+ URI="valid/sa/029.xml" SECTIONS="2.8 [24]"
+ OUTPUT="valid/sa/out/029.xml">
+ Test demonstrates a valid prolog that uses single quotes as delimters around the VersionNum. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-030"
+ URI="valid/sa/030.xml" SECTIONS="2.8 [25]"
+ OUTPUT="valid/sa/out/030.xml">
+ Test demonstrates a valid prolog that contains whitespace on both sides of the equal sign in the VersionInfo. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-031"
+ URI="valid/sa/031.xml" SECTIONS="4.3.3 [80]"
+ OUTPUT="valid/sa/out/031.xml">
+ Test demonstrates a valid EncodingDecl within the prolog. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-032"
+ URI="valid/sa/032.xml" SECTIONS="2.9 [32]"
+ OUTPUT="valid/sa/out/032.xml">
+ Test demonstrates a valid SDDecl within the prolog. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-033"
+ URI="valid/sa/033.xml" SECTIONS="2.8 [23]"
+ OUTPUT="valid/sa/out/033.xml">
+ Test demonstrates that both a EncodingDecl and SDDecl are valid within the prolog. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-034"
+ URI="valid/sa/034.xml" SECTIONS="3.1 [44]"
+ OUTPUT="valid/sa/out/034.xml">
+ Test demonstrates the correct syntax for an Empty element tag. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-035"
+ URI="valid/sa/035.xml" SECTIONS="3.1 [44]"
+ OUTPUT="valid/sa/out/035.xml">
+ Test demonstrates that whitespace is permissible after the name in an Empty element tag. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-036"
+ URI="valid/sa/036.xml" SECTIONS="2.6 [16]"
+ OUTPUT="valid/sa/out/036.xml">
+ Test demonstrates a valid processing instruction. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-037"
+ URI="valid/sa/037.xml" SECTIONS="2.6 [15]"
+ OUTPUT="valid/sa/out/037.xml">
+ Test demonstrates a valid comment and that it may appear anywhere in the document including at the end. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-038"
+ URI="valid/sa/038.xml" SECTIONS="2.6 [15]"
+ OUTPUT="valid/sa/out/038.xml">
+ Test demonstrates a valid comment and that it may appear anywhere in the document including the beginning. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-039"
+ URI="valid/sa/039.xml" SECTIONS="2.6 [16]"
+ OUTPUT="valid/sa/out/039.xml">
+ Test demonstrates a valid processing instruction and that it may appear at the beginning of the document. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-040"
+ URI="valid/sa/040.xml" SECTIONS="3.3 3.3.1 [52] [54]"
+ OUTPUT="valid/sa/out/040.xml">
+ Test demonstrates an Attribute List declaration that uses a StringType as the AttType. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-041"
+ URI="valid/sa/041.xml" SECTIONS="3.3.1 4.1 [54] [66]"
+ OUTPUT="valid/sa/out/041.xml">
+ Test demonstrates an Attribute List declaration that uses a StringType as the AttType and also expands the CDATA attribute with a character reference. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-042"
+ URI="valid/sa/042.xml" SECTIONS="3.3.1 4.1 [54] [66]"
+ OUTPUT="valid/sa/out/042.xml">
+ Test demonstrates an Attribute List declaration that uses a StringType as the AttType and also expands the CDATA attribute with a character reference. The test also shows that the leading zeros in the character reference are ignored. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-043"
+ URI="valid/sa/043.xml" SECTIONS="3.3"
+ OUTPUT="valid/sa/out/043.xml">
+ An element's attributes may be declared before its content
+ model; and attribute values may contain newlines. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-044"
+ URI="valid/sa/044.xml" SECTIONS="3.1 [44]"
+ OUTPUT="valid/sa/out/044.xml">
+ Test demonstrates that the empty-element tag must be use for an elements that are declared EMPTY. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-045"
+ URI="valid/sa/045.xml" SECTIONS="3.3 [52]"
+ OUTPUT="valid/sa/out/045.xml">
+ Tests whether more than one definition can be provided for the same attribute of a given element type with the first declaration being binding. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-046"
+ URI="valid/sa/046.xml" SECTIONS="3.3 [52]"
+ OUTPUT="valid/sa/out/046.xml">
+ Test demonstrates that when more than one AttlistDecl is provided for a given element type, the contents of all those provided are merged. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-047"
+ URI="valid/sa/047.xml" SECTIONS="3.1 [43]"
+ OUTPUT="valid/sa/out/047.xml">
+ Test demonstrates that extra whitespace is normalized into single space character. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-048"
+ URI="valid/sa/048.xml" SECTIONS="2.4 3.1 [14] [43]"
+ OUTPUT="valid/sa/out/048.xml">
+ Test demonstrates that character data is valid element content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-049"
+ URI="valid/sa/049.xml" SECTIONS="2.2 [2]"
+ OUTPUT="valid/sa/out/049.xml">
+ Test demonstrates that characters outside of normal ascii range can be used as element content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-050"
+ URI="valid/sa/050.xml" SECTIONS="2.2 [2]"
+ OUTPUT="valid/sa/out/050.xml">
+ Test demonstrates that characters outside of normal ascii range can be used as element content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-051"
+ URI="valid/sa/051.xml" SECTIONS="2.2 [2]"
+ OUTPUT="valid/sa/out/051.xml">
+ The document is encoded in UTF-16 and uses some name
+ characters well outside of the normal ASCII range.
+ </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-052"
+ URI="valid/sa/052.xml" SECTIONS="2.2 [2]"
+ OUTPUT="valid/sa/out/052.xml">
+ The document is encoded in UTF-8 and the text inside the
+ root element uses two non-ASCII characters, encoded in UTF-8
+ and each of which expands to a Unicode surrogate pair.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-053"
+ URI="valid/sa/053.xml" SECTIONS="4.4.2"
+ OUTPUT="valid/sa/out/053.xml">
+ Tests inclusion of a well-formed internal entity, which
+ holds an element required by the content model.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-054"
+ URI="valid/sa/054.xml" SECTIONS="3.1 [40] [42]"
+ OUTPUT="valid/sa/out/054.xml">
+ Test demonstrates that extra whitespace within Start-tags and End-tags are nomalized into single spaces. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-055"
+ URI="valid/sa/055.xml" SECTIONS="2.6 2.10 [16]"
+ OUTPUT="valid/sa/out/055.xml">
+ Test demonstrates that extra whitespace within a processing instruction willnormalized into s single space character. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-056"
+ URI="valid/sa/056.xml" SECTIONS="3.3.1 4.1 [54] [66]"
+ OUTPUT="valid/sa/out/056.xml">
+ Test demonstrates an Attribute List declaration that uses a StringType as the AttType and also expands the CDATA attribute with a character reference. The test also shows that the leading zeros in the character reference are ignored. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-057"
+ URI="valid/sa/057.xml" SECTIONS="3.2.1 [47]"
+ OUTPUT="valid/sa/out/057.xml">
+ Test demonstrates an element content model whose element can occur zero or more times. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-058"
+ URI="valid/sa/058.xml" SECTIONS="3.3.3"
+ OUTPUT="valid/sa/out/058.xml">
+ Test demonstrates that extra whitespace be normalized into a single space character in an attribute of type NMTOKENS. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-059"
+ URI="valid/sa/059.xml" SECTIONS="3.2 3.3 [46] [53]"
+ OUTPUT="valid/sa/out/059.xml">
+ Test demonstrates an Element Type Declaration that uses the contentspec of EMPTY. The element cannot have any contents and must always appear as an empty element in the document. The test also shows an Attribute-list declaration with multiple AttDef's. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-060"
+ URI="valid/sa/060.xml" SECTIONS="4.1 [66]"
+ OUTPUT="valid/sa/out/060.xml">
+ Test demonstrates the use of decimal Character References within element content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-061"
+ URI="valid/sa/061.xml" SECTIONS="4.1 [66]"
+ OUTPUT="valid/sa/out/061.xml">
+ Test demonstrates the use of decimal Character References within element content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-062"
+ URI="valid/sa/062.xml" SECTIONS="4.1 [66]"
+ OUTPUT="valid/sa/out/062.xml">
+ Test demonstrates the use of hexadecimal Character References within element. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-063"
+ URI="valid/sa/063.xml" SECTIONS="2.3 [5]"
+ OUTPUT="valid/sa/out/063.xml">
+ The document is encoded in UTF-8 and the name of the
+ root element type uses non-ASCII characters. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-064"
+ URI="valid/sa/064.xml" SECTIONS="4.1 [66]"
+ OUTPUT="valid/sa/out/064.xml">
+ Tests in-line handling of two legal character references, which
+ each expand to a Unicode surrogate pair.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-065"
+ URI="valid/sa/065.xml" SECTIONS="4.5"
+ OUTPUT="valid/sa/out/065.xml">
+ Tests ability to define an internal entity which can't
+ legally be expanded (contains an unquoted <B><</B>).</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-066"
+ URI="valid/sa/066.xml" SECTIONS="4.1 [66]"
+ OUTPUT="valid/sa/out/066.xml">
+ Expands a CDATA attribute with a character reference.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-067"
+ URI="valid/sa/067.xml" SECTIONS="4.1 [66]"
+ OUTPUT="valid/sa/out/067.xml">
+ Test demonstrates the use of decimal character references within element content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-068"
+ URI="valid/sa/068.xml" SECTIONS="2.11, 4.5"
+ OUTPUT="valid/sa/out/068.xml">
+ Tests definition of an internal entity holding a carriage return character
+ reference, which must not be normalized before reporting to the application. Line
+ break normalization only occurs when parsing external parsed entities.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-069"
+ URI="valid/sa/069.xml" SECTIONS="4.7"
+ OUTPUT="valid/sa/out/069.xml">
+ Verifies that an XML parser will parse a NOTATION
+ declaration; the output phase of this test ensures that
+ it's reported to the application. </TEST>
+<TEST TYPE="valid" ENTITIES="parameter" ID="valid-sa-070"
+ URI="valid/sa/070.xml" SECTIONS="4.4.8"
+ OUTPUT="valid/sa/out/070.xml">
+ Verifies that internal parameter entities are correctly
+ expanded within the internal subset.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-071"
+ URI="valid/sa/071.xml" SECTIONS="3.3 3.3.1 [52] [56]"
+ OUTPUT="valid/sa/out/071.xml">
+ Test demonstrates that an AttlistDecl can use ID as the TokenizedType within the Attribute type. The test also shows that IMPLIED is a valid DefaultDecl. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-072"
+ URI="valid/sa/072.xml" SECTIONS="3.3 3.3.1 [52] [56]"
+ OUTPUT="valid/sa/out/072.xml">
+ Test demonstrates that an AttlistDecl can use IDREF as the TokenizedType within the Attribute type. The test also shows that IMPLIED is a valid DefaultDecl. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-073"
+ URI="valid/sa/073.xml" SECTIONS="3.3 3.3.1 [52] [56]"
+ OUTPUT="valid/sa/out/073.xml">
+ Test demonstrates that an AttlistDecl can use IDREFS as the TokenizedType within the Attribute type. The test also shows that IMPLIED is a valid DefaultDecl. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-074"
+ URI="valid/sa/074.xml" SECTIONS="3.3 3.3.1 [52] [56]"
+ OUTPUT="valid/sa/out/074.xml">
+ Test demonstrates that an AttlistDecl can use ENTITY as the TokenizedType within the Attribute type. The test also shows that IMPLIED is a valid DefaultDecl. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-075"
+ URI="valid/sa/075.xml" SECTIONS="3.3 3.3.1 [52] [56]"
+ OUTPUT="valid/sa/out/075.xml">
+ Test demonstrates that an AttlistDecl can use ENTITIES as the TokenizedType within the Attribute type. The test also shows that IMPLIED is a valid DefaultDecl. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-076"
+ URI="valid/sa/076.xml" SECTIONS="3.3.1"
+ OUTPUT="valid/sa/out/076.xml">
+ Verifies that an XML parser will parse a NOTATION
+ attribute; the output phase of this test ensures that
+ both notations are reported to the application. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-077"
+ URI="valid/sa/077.xml" SECTIONS="3.3 3.3.1 [52] [54]"
+ OUTPUT="valid/sa/out/077.xml">
+ Test demonstrates that an AttlistDecl can use an EnumeratedType within the Attribute type. The test also shows that IMPLIED is a valid DefaultDecl. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-078"
+ URI="valid/sa/078.xml" SECTIONS="3.3 3.3.1 [52] [54]"
+ OUTPUT="valid/sa/out/078.xml">
+ Test demonstrates that an AttlistDecl can use an StringType of CDATA within the Attribute type. The test also shows that REQUIRED is a valid DefaultDecl. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-079"
+ URI="valid/sa/079.xml" SECTIONS="3.3 3.3.2 [52] [60]"
+ OUTPUT="valid/sa/out/079.xml">
+ Test demonstrates that an AttlistDecl can use an StringType of CDATA within the Attribute type. The test also shows that FIXED is a valid DefaultDecl and that a value can be given to the attribute in the Start-tag as well as the AttListDecl. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-080"
+ URI="valid/sa/080.xml" SECTIONS="3.3 3.3.2 [52] [60]"
+ OUTPUT="valid/sa/out/080.xml">
+ Test demonstrates that an AttlistDecl can use an StringType of CDATA within the Attribute type. The test also shows that FIXED is a valid DefaultDecl and that an value can be given to the attribute. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-081"
+ URI="valid/sa/081.xml" SECTIONS="3.2.1 [50]"
+ OUTPUT="valid/sa/out/081.xml">
+ Test demonstrates the use of the optional character following a name or list to govern the number of times an element or content particles in the list occur. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-082"
+ URI="valid/sa/082.xml" SECTIONS="4.2 [72]"
+ OUTPUT="valid/sa/out/082.xml">
+ Tests that an external PE may be defined (but not referenced).</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-083"
+ URI="valid/sa/083.xml" SECTIONS="4.2 [72]"
+ OUTPUT="valid/sa/out/083.xml">
+ Tests that an external PE may be defined (but not referenced).</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-084"
+ URI="valid/sa/084.xml" SECTIONS="2.10"
+ OUTPUT="valid/sa/out/084.xml">
+ Test demonstrates that although whitespace can be used to set apart markup for greater readability it is not necessary. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-085"
+ URI="valid/sa/085.xml" SECTIONS="4"
+ OUTPUT="valid/sa/out/085.xml">
+ Parameter and General entities use different namespaces,
+ so there can be an entity of each type with a given name.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-086"
+ URI="valid/sa/086.xml" SECTIONS="4.2"
+ OUTPUT="valid/sa/out/086.xml">
+ Tests whether entities may be declared more than once,
+ with the first declaration being the binding one. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-087"
+ URI="valid/sa/087.xml" SECTIONS="4.5"
+ OUTPUT="valid/sa/out/087.xml">
+ Tests whether character references in internal entities are
+ expanded early enough, by relying on correct handling to
+ make the entity be well formed.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-088"
+ URI="valid/sa/088.xml" SECTIONS="4.5"
+ OUTPUT="valid/sa/out/088.xml">
+ Tests whether entity references in internal entities are
+ expanded late enough, by relying on correct handling to
+ make the expanded text be valid. (If it's expanded too
+ early, the entity will parse as an element that's not
+ valid in that context.)</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-089"
+ URI="valid/sa/089.xml" SECTIONS="4.1 [66]"
+ OUTPUT="valid/sa/out/089.xml">
+ Tests entity expansion of three legal character references,
+ which each expand to a Unicode surrogate pair.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-090"
+ URI="valid/sa/090.xml" SECTIONS="3.3.1"
+ OUTPUT="valid/sa/out/090.xml">
+ Verifies that an XML parser will parse a NOTATION
+ attribute; the output phase of this test ensures that
+ the notation is reported to the application. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-091"
+ URI="valid/sa/091.xml" SECTIONS="3.3.1"
+ OUTPUT="valid/sa/out/091.xml">
+ Verifies that an XML parser will parse an ENTITY
+ attribute; the output phase of this test ensures that
+ the notation is reported to the application, and for
+ validating parsers it further tests that the entity
+ is so reported.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-092"
+ URI="valid/sa/092.xml" SECTIONS="2.3 2.10"
+ OUTPUT="valid/sa/out/092.xml">
+ Test demostrates that extra whitespace is normalized into a single space character. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-093"
+ URI="valid/sa/093.xml" SECTIONS="2.10"
+ OUTPUT="valid/sa/out/093.xml">
+ Test demonstrates that extra whitespace is not intended for inclusion in the delivered version of the document. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-094"
+ OUTPUT="valid/sa/out/094.xml"
+ URI="valid/sa/094.xml" SECTIONS="2.8">
+ Attribute defaults with a DTD have special parsing rules, different
+ from other strings. That means that characters found there may look
+ like an undefined parameter entity reference "within a markup
+ declaration", but they aren't ... so they can't be violating
+ the <EM>PEs in Internal Subset</EM> WFC.
+ </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-095"
+ URI="valid/sa/095.xml" SECTIONS="3.3.3"
+ OUTPUT="valid/sa/out/095.xml">
+ Basically an output test, this requires extra whitespace
+ to be normalized into a single space character in an
+ attribute of type NMTOKENS.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-096"
+ URI="valid/sa/096.xml" SECTIONS="3.3.3"
+ OUTPUT="valid/sa/out/096.xml">
+ Test demonstrates that extra whitespace is normalized into a single space character in an attribute of type NMTOKENS. </TEST>
+<TEST TYPE="valid" ENTITIES="parameter" ID="valid-sa-097"
+ URI="valid/sa/097.xml" SECTIONS="3.3"
+ OUTPUT="valid/sa/out/097.xml">
+ Basically an output test, this tests whether an externally
+ defined attribute declaration (with a default) takes proper
+ precedence over a subsequent internal declaration.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-098"
+ URI="valid/sa/098.xml" SECTIONS="2.6 2.10 [16]"
+ OUTPUT="valid/sa/out/098.xml">
+ Test demonstrates that extra whitespace within a processing instruction is converted into a single space character.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-099"
+ URI="valid/sa/099.xml" SECTIONS="4.3.3 [81]"
+ OUTPUT="valid/sa/out/099.xml">
+ Test demonstrates the name of the encoding can be composed of lowercase characters. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-100"
+ URI="valid/sa/100.xml" SECTIONS="2.3 [12]"
+ OUTPUT="valid/sa/out/100.xml">
+ Makes sure that PUBLIC identifiers may have some strange
+ characters. <EM>NOTE: The XML editors have said that the XML
+ specification errata will specify that parameter entity expansion
+ does not occur in PUBLIC identifiers, so that the '%' character
+ will not flag a malformed parameter entity reference.</EM></TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-101"
+ URI="valid/sa/101.xml" SECTIONS="4.5"
+ OUTPUT="valid/sa/out/101.xml">
+ This tests whether entity expansion is (incorrectly) done
+ while processing entity declarations; if it is, the entity
+ value literal will terminate prematurely.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-102"
+ URI="valid/sa/102.xml" SECTIONS="3.3.3"
+ OUTPUT="valid/sa/out/102.xml">
+ Test demonstrates that a CDATA attribute can pass a double quote as its value. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-103"
+ URI="valid/sa/103.xml" SECTIONS="3.3.3"
+ OUTPUT="valid/sa/out/103.xml">
+ Test demonstrates that an attribute can pass a less than sign as its value. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-104"
+ URI="valid/sa/104.xml" SECTIONS="3.1 [40]"
+ OUTPUT="valid/sa/out/104.xml">
+ Test demonstrates that extra whitespace within an Attribute of a Start-tag is normalized to a single space character. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-105"
+ URI="valid/sa/105.xml" SECTIONS="3.3.3"
+ OUTPUT="valid/sa/out/105.xml">
+ Basically an output test, this requires a CDATA attribute
+ with a tab character to be passed through as one space.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-106"
+ URI="valid/sa/106.xml" SECTIONS="3.3.3"
+ OUTPUT="valid/sa/out/106.xml">
+ Basically an output test, this requires a CDATA attribute
+ with a newline character to be passed through as one space.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-107"
+ URI="valid/sa/107.xml" SECTIONS="3.3.3"
+ OUTPUT="valid/sa/out/107.xml">
+ Basically an output test, this requires a CDATA attribute
+ with a return character to be passed through as one space.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-108"
+ URI="valid/sa/108.xml" SECTIONS="2.11, 3.3.3"
+ OUTPUT="valid/sa/out/108.xml">
+ This tests normalization of end-of-line characters (CRLF)
+ within entities to LF, primarily as an output test. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-109"
+ URI="valid/sa/109.xml" SECTIONS="2.3 3.1 [10][40][41]"
+ OUTPUT="valid/sa/out/109.xml">
+ Test demonstrates that an attribute can have a null value. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-110"
+ URI="valid/sa/110.xml" SECTIONS="3.3.3"
+ OUTPUT="valid/sa/out/110.xml">
+ Basically an output test, this requires that a CDATA
+ attribute with a CRLF be normalized to one space.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-111"
+ URI="valid/sa/111.xml" SECTIONS="3.3.3"
+ OUTPUT="valid/sa/out/111.xml">
+ Character references expanding to spaces doesn't affect
+ treatment of attributes. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-112"
+ URI="valid/sa/112.xml" SECTIONS="3.2.1 [48][49]"
+ OUTPUT="valid/sa/out/112.xml">
+ Test demonstrates shows the use of content particles within the element content. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-113"
+ URI="valid/sa/113.xml" SECTIONS="3.3 [52][53]"
+ OUTPUT="valid/sa/out/113.xml">
+ Test demonstrates that it is not an error to have attributes declared for an element not itself declared.</TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-114"
+ URI="valid/sa/114.xml" SECTIONS="2.7 [20]"
+ OUTPUT="valid/sa/out/114.xml">
+ Test demonstrates that all text within a valid CDATA section is considered text and not recognized as markup. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-115"
+ URI="valid/sa/115.xml" SECTIONS="3.3.3"
+ OUTPUT="valid/sa/out/115.xml">
+ Test demonstrates that an entity reference is processed by recursively processing the replacement text of the entity. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-116"
+ URI="valid/sa/116.xml" SECTIONS="2.11"
+ OUTPUT="valid/sa/out/116.xml">
+ Test demonstrates that a line break within CDATA will be normalized. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-117"
+ URI="valid/sa/117.xml" SECTIONS="4.5"
+ OUTPUT="valid/sa/out/117.xml">
+ Test demonstrates that entity expansion is done while processing entity declarations. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-118"
+ URI="valid/sa/118.xml" SECTIONS="4.5"
+ OUTPUT="valid/sa/out/118.xml">
+ Test demonstrates that entity expansion is done while processing entity declarations. </TEST>
+<TEST TYPE="valid" ENTITIES="none" ID="valid-sa-119"
+ URI="valid/sa/119.xml" SECTIONS="2.5"
+ OUTPUT="valid/sa/out/119.xml">
+ Comments may contain any legal XML characters;
+ only the string "--" is disallowed.</TEST>
+
+
+<!-- Start: valid/not-sa -->
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-001"
+ URI="valid/not-sa/001.xml" SECTIONS="4.2.2 [75]"
+ OUTPUT="valid/not-sa/out/001.xml">
+ Test demonstrates the use of an ExternalID within a document type definition. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-002"
+ URI="valid/not-sa/002.xml" SECTIONS="4.2.2 [75]"
+ OUTPUT="valid/not-sa/out/002.xml">
+ Test demonstrates the use of an ExternalID within a document type definition. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-003"
+ URI="valid/not-sa/003.xml" SECTIONS="4.1 [69]"
+ OUTPUT="valid/not-sa/out/003.xml">
+ Test demonstrates the expansion of an external parameter entity that declares an attribute. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-004"
+ URI="valid/not-sa/004.xml" SECTIONS="4.1 [69]"
+ OUTPUT="valid/not-sa/out/004.xml">
+ Expands an external parameter entity in two different ways,
+ with one of them declaring an attribute.</TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-005"
+ URI="valid/not-sa/005.xml" SECTIONS="4.1 [69]"
+ OUTPUT="valid/not-sa/out/005.xml">
+ Test demonstrates the expansion of an external parameter entity that declares an attribute. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-006"
+ URI="valid/not-sa/006.xml" SECTIONS="3.3 [52]"
+ OUTPUT="valid/not-sa/out/006.xml">
+ Test demonstrates that when more than one definition is provided for the same attribute of a given element type only the first declaration is binding. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-007"
+ URI="valid/not-sa/007.xml" SECTIONS="3.3 [52]"
+ OUTPUT="valid/not-sa/out/007.xml">
+ Test demonstrates the use of an Attribute list declaration within an external entity. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-008"
+ URI="valid/not-sa/008.xml" SECTIONS="4.2.2 [75]"
+ OUTPUT="valid/not-sa/out/008.xml">
+ Test demonstrates that an external identifier may include a public identifier. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-009"
+ URI="valid/not-sa/009.xml" SECTIONS="4.2.2 [75]"
+ OUTPUT="valid/not-sa/out/009.xml">
+ Test demonstrates that an external identifier may include a public identifier. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-010"
+ URI="valid/not-sa/010.xml" SECTIONS="3.3 [52]"
+ OUTPUT="valid/not-sa/out/010.xml">
+ Test demonstrates that when more that one definition is provided for the same attribute of a given element type only the first declaration is binding. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-011"
+ URI="valid/not-sa/011.xml" SECTIONS="4.2 4.2.1 [72] [75]"
+ OUTPUT="valid/not-sa/out/011.xml">
+ Test demonstrates a parameter entity declaration whose parameter entity definition is an ExternalID. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-012"
+ URI="valid/not-sa/012.xml" SECTIONS="4.3.1 [77]"
+ OUTPUT="valid/not-sa/out/012.xml">
+ Test demonstrates an enternal parsed entity that begins with a text declaration. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-013"
+ URI="valid/not-sa/013.xml" SECTIONS="3.4 [62]"
+ OUTPUT="valid/not-sa/out/013.xml">
+ Test demonstrates the use of the conditional section INCLUDE that will include its contents as part of the DTD. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-014"
+ URI="valid/not-sa/014.xml" SECTIONS="3.4 [62]"
+ OUTPUT="valid/not-sa/out/014.xml">
+ Test demonstrates the use of the conditional section INCLUDE that will include its contents as part of the DTD. The keyword is a parameter-entity reference. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-015"
+ URI="valid/not-sa/015.xml" SECTIONS="3.4 [63]"
+ OUTPUT="valid/not-sa/out/015.xml">
+ Test demonstrates the use of the conditonal section IGNORE the will ignore its content from being part of the DTD. The keyword is a parameter-entity reference. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-016"
+ URI="valid/not-sa/016.xml" SECTIONS="3.4 [62]"
+ OUTPUT="valid/not-sa/out/016.xml">
+ Test demonstrates the use of the conditional section INCLUDE that will include its contents as part of the DTD. The keyword is a parameter-entity reference.</TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-017"
+ URI="valid/not-sa/017.xml" SECTIONS="4.2 [72]"
+ OUTPUT="valid/not-sa/out/017.xml">
+ Test demonstrates a parameter entity declaration that contains an attribute list declaration. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-018"
+ URI="valid/not-sa/018.xml" SECTIONS="4.2.2 [75]"
+ OUTPUT="valid/not-sa/out/018.xml">
+ Test demonstrates an EnternalID whose contents contain an parameter entity declaration and a attribute list definition. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-019"
+ URI="valid/not-sa/019.xml" SECTIONS="4.4.8"
+ OUTPUT="valid/not-sa/out/019.xml">
+ Test demonstrates that a parameter entity will be expanded with spaces on either side. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-020"
+ URI="valid/not-sa/020.xml" SECTIONS="4.4.8"
+ OUTPUT="valid/not-sa/out/020.xml">
+ Parameter entities expand with spaces on either side.</TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-021"
+ URI="valid/not-sa/021.xml" SECTIONS="4.2 [72]"
+ OUTPUT="valid/not-sa/out/021.xml">
+ Test demonstrates a parameter entity declaration that contains a partial attribute list declaration. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-023"
+ URI="valid/not-sa/023.xml" SECTIONS="2.3 4.1 [10] [69]"
+ OUTPUT="valid/not-sa/out/023.xml">
+ Test demonstrates the use of a parameter entity reference within an attribute list declaration.
+</TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-024"
+ URI="valid/not-sa/024.xml" SECTIONS="2.8, 4.1 [69]"
+ OUTPUT="valid/not-sa/out/024.xml">
+ Constructs an <!ATTLIST...> declaration from several PEs.</TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-025"
+ URI="valid/not-sa/025.xml" SECTIONS="4.2"
+ OUTPUT="valid/not-sa/out/025.xml">
+ Test demonstrates that when more that one definition is provided for the same entity only the first declaration is binding. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-026"
+ URI="valid/not-sa/026.xml" SECTIONS="3.3 [52]"
+ OUTPUT="valid/not-sa/out/026.xml">
+ Test demonstrates that when more that one definition is provided for the same attribute of a given element type only the first declaration is binding. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-027"
+ URI="valid/not-sa/027.xml" SECTIONS="4.1 [69]"
+ OUTPUT="valid/not-sa/out/027.xml">
+ Test demonstrates a parameter entity reference whose value is NULL. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-028"
+ URI="valid/not-sa/028.xml" SECTIONS="3.4 [62]"
+ OUTPUT="valid/not-sa/out/028.xml">
+ Test demonstrates the use of the conditional section INCLUDE that will include its contents. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-029"
+ URI="valid/not-sa/029.xml" SECTIONS="3.4 [62]"
+ OUTPUT="valid/not-sa/out/029.xml">
+ Test demonstrates the use of the conditonal section IGNORE the will ignore its content from being used. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-030"
+ URI="valid/not-sa/030.xml" SECTIONS="3.4 [62]"
+ OUTPUT="valid/not-sa/out/030.xml">
+ Test demonstrates the use of the conditonal section IGNORE the will ignore its content from being used. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-not-sa-031"
+ URI="valid/not-sa/031.xml" SECTIONS="2.7"
+ OUTPUT="valid/not-sa/out/031.xml">
+ Expands a general entity which contains a CDATA section with
+ what looks like a markup declaration (but is just text since
+ it's in a CDATA section).</TEST>
+
+
+<!-- Start: valid/ext-sa -->
+<TEST TYPE="valid" ENTITIES="both" ID="valid-ext-sa-001"
+ URI="valid/ext-sa/001.xml" SECTIONS="2.11"
+ OUTPUT="valid/ext-sa/out/001.xml">
+ A combination of carriage return line feed in an external entity must
+ be normalized to a single newline. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-ext-sa-002"
+ URI="valid/ext-sa/002.xml" SECTIONS="2.11"
+ OUTPUT="valid/ext-sa/out/002.xml">
+ A carriage return (also CRLF) in an external entity must
+ be normalized to a single newline. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-ext-sa-003"
+ URI="valid/ext-sa/003.xml" SECTIONS="3.1 4.1 [43] [68]"
+ OUTPUT="valid/ext-sa/out/003.xml">
+ Test demonstrates that the content of an element can be empty. In this case the external entity is an empty file. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-ext-sa-004"
+ URI="valid/ext-sa/004.xml" SECTIONS="2.11"
+ OUTPUT="valid/ext-sa/out/004.xml">
+ A carriage return (also CRLF) in an external entity must
+ be normalized to a single newline. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-ext-sa-005"
+ URI="valid/ext-sa/005.xml" SECTIONS="3.2.1 4.2.2 [48] [75]"
+ OUTPUT="valid/ext-sa/out/005.xml">
+ Test demonstrates the use of optional character and content particles within an element content. The test also show the use of external entity. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-ext-sa-006"
+ URI="valid/ext-sa/006.xml" SECTIONS="2.11 3.2.1 3.2.2 4.2.2 [48] [51] [75]"
+ OUTPUT="valid/ext-sa/out/006.xml">
+ Test demonstrates the use of optional character and content particles within mixed element content. The test also shows the use of an external entity and that a carriage control line feed in an external entity must be normalized to a single newline. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-ext-sa-007"
+ URI="valid/ext-sa/007.xml" SECTIONS="4.2.2 4.4.3 [75]"
+ OUTPUT="valid/ext-sa/out/007.xml">
+ Test demonstrates the use of external entity and how replacement
+text is retrieved and processed. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-ext-sa-008"
+ URI="valid/ext-sa/008.xml" SECTIONS="4.2.2 4.3.3. 4.4.3 [75] [80]"
+ OUTPUT="valid/ext-sa/out/008.xml"> Test demonstrates the use of external
+entity and how replacement text is retrieved and processed. Also tests the use of an
+EncodingDecl of UTF-16.</TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-ext-sa-009"
+ URI="valid/ext-sa/009.xml" SECTIONS="2.11"
+ OUTPUT="valid/ext-sa/out/009.xml">
+ A carriage return (also CRLF) in an external entity must
+ be normalized to a single newline. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-ext-sa-011"
+ URI="valid/ext-sa/011.xml" SECTIONS="2.11 4.2.2 [75]"
+ OUTPUT="valid/ext-sa/out/011.xml">
+ Test demonstrates the use of a public identifier with and external entity.
+The test also show that a carriage control line feed combination in an external
+entity must be normalized to a single newline. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-ext-sa-012"
+ URI="valid/ext-sa/012.xml" SECTIONS="4.2.1 4.2.2"
+ OUTPUT="valid/ext-sa/out/012.xml">
+ Test demonstrates both internal and external entities and that processing of entity references may be required to produce the correct replacement text.</TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-ext-sa-013"
+ URI="valid/ext-sa/013.xml" SECTIONS="3.3.3"
+ OUTPUT="valid/ext-sa/out/013.xml">
+ Test demonstrates that whitespace is handled by adding a single whitespace to the normalized value in the attribute list. </TEST>
+<TEST TYPE="valid" ENTITIES="both" ID="valid-ext-sa-014"
+ URI="valid/ext-sa/014.xml" SECTIONS="4.1 4.4.3 [68]"
+ OUTPUT="valid/ext-sa/out/014.xml">
+ Test demonstrates use of characters outside of normal ASCII range.</TEST>
+</TESTCASES>
! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: xml io.encodings.utf8 io.files kernel tools.test ;
+USING: xml xml.data kernel tools.test ;
IN: xml.tests
-[ ] [
- "resource:basis/xmode/xmode.dtd" utf8 <file-reader>
- read-xml-chunk drop
+[ t ] [
+ "resource:basis/xmode/xmode.dtd" file>dtd dtd?
] unit-test
--- /dev/null
+Basic tools for parsing XML
-! Copyright (C) 2005, 2006 Daniel Ehrenberg
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays ascii assocs combinators
-combinators.short-circuit fry io.encodings io.encodings.iana
-io.encodings.string io.encodings.utf16 io.encodings.utf8 kernel make
-math math.parser namespaces sequences sets splitting state-parser
-strings xml.char-classes xml.data xml.entities xml.errors hashtables ;
+USING: namespaces xml.state kernel sequences accessors
+xml.char-classes xml.errors math io sbufs fry strings ascii
+circular xml.entities assocs make splitting math.parser
+locals combinators arrays ;
IN: xml.tokenize
-! XML namespace processing: ns = namespace
-
-! A stack of hashtables
-SYMBOL: ns-stack
-
-: attrs>ns ( attrs-alist -- hash )
- ! this should check to make sure URIs are valid
- [
- [
- swap dup space>> "xmlns" =
- [ main>> set ]
- [
- T{ name f "" "xmlns" f } names-match?
- [ "" set ] [ drop ] if
- ] if
- ] assoc-each
- ] { } make-assoc f like ;
-
-: add-ns ( name -- )
- dup space>> dup ns-stack get assoc-stack
- [ nip ] [ nonexist-ns ] if* >>url drop ;
-
-: push-ns ( hash -- )
- ns-stack get push ;
-
-: pop-ns ( -- )
- ns-stack get pop* ;
-
-: init-ns-stack ( -- )
- V{ H{
- { "xml" "http://www.w3.org/XML/1998/namespace" }
- { "xmlns" "http://www.w3.org/2000/xmlns" }
- { "" "" }
- } } clone
- ns-stack set ;
-
-: tag-ns ( name attrs-alist -- name attrs )
- dup attrs>ns push-ns
- [ dup add-ns ] dip dup [ drop add-ns ] assoc-each <attrs> ;
-
-! Parsing names
-
: version=1.0? ( -- ? )
- prolog-data get version>> "1.0" = ;
-
-! version=1.0? is calculated once and passed around for efficiency
-
-: assure-name ( str version=1.0? -- str )
- over {
- [ first name-start? ]
- [ rest-slice [ name-char? ] with all? ]
- } 2&& [ bad-name ] unless ;
-
-: (parse-name) ( start -- str )
- version=1.0?
- [ [ get-char name-char? not ] curry take-until append ]
- [ assure-name ] bi ;
-
-: parse-name-starting ( start -- name )
- (parse-name) get-char CHAR: : =
- [ next "" (parse-name) ] [ "" swap ] if f <name> ;
+ prolog-data get [ version>> "1.0" = ] [ t ] if* ;
-: parse-name ( -- name )
- "" parse-name-starting ;
-
-! -- Parsing strings
+: assure-good-char ( ch -- ch )
+ [
+ version=1.0? over text? not get-check and
+ [ disallowed-char ] when
+ ] [ f ] if* ;
+
+! * Basic utility words
+
+: record ( char -- )
+ CHAR: \n =
+ [ 0 get-line 1+ set-line ] [ get-column 1+ ] if
+ set-column ;
+
+! (next) normalizes \r\n and \r
+: (next) ( -- char )
+ get-next read1
+ 2dup swap CHAR: \r = [
+ CHAR: \n =
+ [ nip read1 ] [ nip CHAR: \n swap ] if
+ ] [ drop ] if
+ set-next dup set-char assure-good-char ;
+
+: next ( -- )
+ #! Increment spot.
+ get-char [ unexpected-end ] unless (next) record ;
+
+: init-parser ( -- )
+ 0 1 0 f f <spot> spot set
+ read1 set-next next ;
+
+: with-state ( stream quot -- )
+ ! with-input-stream implicitly creates a new scope which we use
+ swap [ init-parser call ] with-input-stream ; inline
+
+: skip-until ( quot: ( -- ? ) -- )
+ get-char [
+ [ call ] keep swap [ drop ] [
+ next skip-until
+ ] if
+ ] [ drop ] if ; inline recursive
+
+: take-until ( quot -- string )
+ #! Take the substring of a string starting at spot
+ #! from code until the quotation given is true and
+ #! advance spot to after the substring.
+ 10 <sbuf> [
+ '[ @ [ t ] [ get-char _ push f ] if ] skip-until
+ ] keep >string ; inline
+
+: take-to ( seq -- string )
+ '[ get-char _ member? ] take-until ;
+
+: pass-blank ( -- )
+ #! Advance code past any whitespace, including newlines
+ [ get-char blank? not ] skip-until ;
+
+: string-matches? ( string circular -- ? )
+ get-char over push-circular
+ sequence= ;
+
+: take-string ( match -- string )
+ dup length <circular-string>
+ [ 2dup string-matches? ] take-until nip
+ dup length rot length 1- - head
+ get-char [ missing-close ] unless next ;
+
+: expect ( string -- )
+ dup [ get-char next ] replicate 2dup =
+ [ 2drop ] [ expected ] if ;
+
+! Suddenly XML-specific
: parse-named-entity ( string -- )
- dup entities at [ , ] [
+ dup entities at [ , ] [
dup extra-entities get at
- [ dup number? [ , ] [ % ] if ] [ no-entity ] ?if ! Make less hackish
+ [ % ] [ no-entity ] ?if
] ?if ;
+: take-; ( -- string )
+ next ";" take-to next ;
+
: parse-entity ( -- )
- next CHAR: ; take-char next
- "#" ?head [
+ take-; "#" ?head [
"x" ?head 16 10 ? base> ,
] [ parse-named-entity ] if ;
-: (parse-char) ( ch -- )
- get-char {
- { [ dup not ] [ 2drop ] }
- { [ 2dup = ] [ 2drop next ] }
- { [ dup CHAR: & = ] [ drop parse-entity (parse-char) ] }
- [ , next (parse-char) ]
- } cond ;
-
-: parse-char ( ch -- string )
- [ (parse-char) ] "" make ;
-
-: parse-text ( -- string )
- CHAR: < parse-char ;
-
-! Parsing tags
-
-: start-tag ( -- name ? )
- #! Outputs the name and whether this is a closing tag
- get-char CHAR: / = dup [ next ] when
- parse-name swap ;
-
-: (parse-quote) ( ch -- string )
- parse-char get-char
- [ unclosed-quote ] unless ;
-
-: parse-quote ( -- seq )
- pass-blank get-char dup "'\"" member?
- [ next (parse-quote) ] [ quoteless-attr ] if ;
-
-: parse-attr ( -- )
- parse-name
- pass-blank CHAR: = expect
- parse-quote
- 2array , ;
-
-: (middle-tag) ( -- )
- pass-blank version=1.0? get-char name-start?
- [ parse-attr (middle-tag) ] when ;
-
-: middle-tag ( -- attrs-alist )
- ! f make will make a vector if it has any elements
- [ (middle-tag) ] f make pass-blank ;
-
-: end-tag ( name attrs-alist -- tag )
- tag-ns pass-blank get-char CHAR: / =
- [ pop-ns <contained> next ] [ <opener> ] if ;
-
-: take-comment ( -- comment )
- "--" expect-string
- "--" take-string
- <comment>
- CHAR: > expect ;
-
-: take-cdata ( -- string )
- "[CDATA[" expect-string "]]>" take-string ;
-
-: take-element-decl ( -- element-decl )
- pass-blank " " take-string pass-blank ">" take-string <element-decl> ;
-
-: take-attlist-decl ( -- doctype-decl )
- pass-blank " " take-string pass-blank ">" take-string <attlist-decl> ;
-
-: take-until-one-of ( seps -- str sep )
- '[ get-char _ member? ] take-until get-char ;
-
-: only-blanks ( str -- )
- [ blank? ] all? [ bad-doctype-decl ] unless ;
-
-: take-system-literal ( -- str ) ! replace with parse-quote?
- pass-blank get-char next {
- { CHAR: ' [ "'" take-string ] }
- { CHAR: " [ "\"" take-string ] }
- } case ;
+: parse-pe ( -- )
+ take-; dup pe-table get at
+ [ % ] [ no-entity ] ?if ;
-: take-system-id ( -- system-id )
- take-system-literal <system-id>
- ">" take-string only-blanks ;
-
-: take-public-id ( -- public-id )
- take-system-literal
- take-system-literal <public-id>
- ">" take-string only-blanks ;
-
-DEFER: direct
-
-: (take-internal-subset) ( -- )
- pass-blank get-char {
- { CHAR: ] [ next ] }
- [ drop "<!" expect-string direct , (take-internal-subset) ]
- } case ;
-
-: take-internal-subset ( -- seq )
- [ (take-internal-subset) ] { } make ;
-
-: (take-external-id) ( token -- external-id )
- pass-blank {
- { "SYSTEM" [ take-system-id ] }
- { "PUBLIC" [ take-public-id ] }
- [ bad-external-id ]
- } case ;
-
-: take-external-id ( -- external-id )
- " " take-string (take-external-id) ;
-
-: take-doctype-decl ( -- doctype-decl )
- pass-blank " >" take-until-one-of {
- { CHAR: \s [
- pass-blank get-char CHAR: [ = [
- next take-internal-subset f swap
- ">" take-string only-blanks
- ] [
- " >" take-until-one-of {
- { CHAR: \s [ (take-external-id) ] }
- { CHAR: > [ only-blanks f ] }
- } case f
- ] if
- ] }
- { CHAR: > [ f f ] }
- } case <doctype-decl> ;
-
-: take-entity-def ( -- entity-name entity-def )
- " " take-string pass-blank get-char {
- { CHAR: ' [ parse-quote ] }
- { CHAR: " [ parse-quote ] }
- [ drop take-external-id ]
- } case ;
-
-: associate-entity ( entity-name entity-def -- )
- swap extra-entities [ ?set-at ] change ;
-
-: take-entity-decl ( -- entity-decl )
- pass-blank get-char {
- { CHAR: % [ next pass-blank take-entity-def ] }
- [ drop take-entity-def 2dup associate-entity ]
- } case
- ">" take-string only-blanks <entity-decl> ;
-
-: take-directive ( -- directive )
- " " take-string {
- { "ELEMENT" [ take-element-decl ] }
- { "ATTLIST" [ take-attlist-decl ] }
- { "DOCTYPE" [ take-doctype-decl ] }
- { "ENTITY" [ take-entity-decl ] }
- [ bad-directive ]
- } case ;
-
-: direct ( -- object )
- get-char {
- { CHAR: - [ take-comment ] }
- { CHAR: [ [ take-cdata ] }
- [ drop take-directive ]
- } case ;
-
-: yes/no>bool ( string -- t/f )
- {
- { "yes" [ t ] }
- { "no" [ f ] }
- [ not-yes/no ]
- } case ;
-
-: assure-no-extra ( seq -- )
- [ first ] map {
- T{ name f "" "version" f }
- T{ name f "" "encoding" f }
- T{ name f "" "standalone" f }
- } diff
- [ extra-attrs ] unless-empty ;
-
-: good-version ( version -- version )
- dup { "1.0" "1.1" } member? [ bad-version ] unless ;
-
-: prolog-version ( alist -- version )
- T{ name f "" "version" f } swap at
- [ good-version ] [ versionless-prolog ] if* ;
-
-: prolog-encoding ( alist -- encoding )
- T{ name f "" "encoding" f } swap at "UTF-8" or ;
-
-: prolog-standalone ( alist -- version )
- T{ name f "" "standalone" f } swap at
- [ yes/no>bool ] [ f ] if* ;
-
-: prolog-attrs ( alist -- prolog )
- [ prolog-version ]
- [ prolog-encoding ]
- [ prolog-standalone ]
- tri <prolog> ;
-
-SYMBOL: string-input?
-: decode-input-if ( encoding -- )
- string-input? get [ drop ] [ decode-input ] if ;
-
-: parse-prolog ( -- prolog )
- pass-blank middle-tag "?>" expect-string
- dup assure-no-extra prolog-attrs
- dup encoding>> dup "UTF-16" =
- [ drop ] [ name>encoding [ decode-input-if ] when* ] if
- dup prolog-data set ;
-
-: instruct ( -- instruction )
- "" (parse-name) dup "xml" =
- [ drop parse-prolog ] [
- dup >lower "xml" =
- [ capitalized-prolog ]
- [ "?>" take-string append <instruction> ] if
- ] if ;
-
-: make-tag ( -- tag )
+:: (parse-char) ( quot: ( ch -- ? ) -- )
+ get-char :> char
{
- { [ get-char dup CHAR: ! = ] [ drop next direct ] }
- { [ CHAR: ? = ] [ next instruct ] }
- [
- start-tag [ dup add-ns pop-ns <closer> ]
- [ middle-tag end-tag ] if
- CHAR: > expect
- ]
- } cond ;
-
-! Autodetecting encodings
-
-: continue-make-tag ( str -- tag )
- parse-name-starting middle-tag end-tag CHAR: > expect ;
-
-: start-utf16le ( -- tag )
- utf16le decode-input-if
- CHAR: ? expect
- 0 expect instruct ;
-
-: 10xxxxxx? ( ch -- ? )
- -6 shift 3 bitand 2 = ;
-
-: start<name ( ch -- tag )
- ascii?
- [ utf8 decode-input-if next make-tag ] [
- next
- [ get-next 10xxxxxx? not ] take-until
- get-char suffix utf8 decode
- utf8 decode-input-if next
- continue-make-tag
- ] if ;
-
-: start< ( -- tag )
- get-next {
- { 0 [ next next start-utf16le ] }
- { CHAR: ? [ next next instruct ] } ! XML prolog parsing sets the encoding
- { CHAR: ! [ utf8 decode-input next next direct ] }
- [ start<name ]
- } case ;
-
-: skip-utf8-bom ( -- tag )
- "\u0000bb\u0000bf" expect utf8 decode-input
- CHAR: < expect make-tag ;
-
-: decode-expecting ( encoding string -- tag )
- [ decode-input-if next ] [ expect-string ] bi* make-tag ;
-
-: start-utf16be ( -- tag )
- utf16be "<" decode-expecting ;
-
-: skip-utf16le-bom ( -- tag )
- utf16le "\u0000fe<" decode-expecting ;
+ { [ char not ] [ ] }
+ { [ char quot call ] [ next ] }
+ { [ char CHAR: & = ] [ parse-entity quot (parse-char) ] }
+ { [ in-dtd? get char CHAR: % = and ] [ parse-pe quot (parse-char) ] }
+ [ char , next quot (parse-char) ]
+ } cond ; inline recursive
+
+: parse-char ( quot: ( ch -- ? ) -- seq )
+ [ (parse-char) ] "" make ; inline
+
+: assure-no-]]> ( circular -- )
+ "]]>" sequence= [ text-w/]]> ] when ;
+
+:: parse-text ( -- string )
+ 3 f <array> <circular> :> circ
+ depth get zero? :> no-text [| char |
+ char circ push-circular
+ circ assure-no-]]>
+ no-text [ char blank? char CHAR: < = or [
+ char 1string t pre/post-content
+ ] unless ] when
+ char CHAR: < =
+ ] parse-char ;
+
+: close ( -- )
+ pass-blank ">" expect ;
+
+: normalize-quote ( str -- str )
+ [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ;
+
+: (parse-quote) ( <-disallowed? ch -- string )
+ swap '[
+ dup _ = [ drop t ]
+ [ CHAR: < = _ and [ attr-w/< ] [ f ] if ] if
+ ] parse-char normalize-quote get-char
+ [ unclosed-quote ] unless ; inline
+
+: parse-quote* ( <-disallowed? -- seq )
+ pass-blank get-char dup "'\"" member?
+ [ next (parse-quote) ] [ quoteless-attr ] if ; inline
-: skip-utf16be-bom ( -- tag )
- utf16be "\u0000ff<" decode-expecting ;
+: parse-quote ( -- seq )
+ f parse-quote* ;
-: start-document ( -- tag )
- get-char {
- { CHAR: < [ start< ] }
- { 0 [ start-utf16be ] }
- { HEX: EF [ skip-utf8-bom ] }
- { HEX: FF [ skip-utf16le-bom ] }
- { HEX: FE [ skip-utf16be-bom ] }
- { f [ "" ] }
- [ drop utf8 decode-input-if f ]
- ! Same problem as with <e`>, in the case of XML chunks?
- } case ;
--- /dev/null
+Utilities for manipulating an XML DOM tree
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.markup help.syntax xml.data sequences strings ;
+IN: xml.utilities
+
+ABOUT: "xml.utilities"
+
+ARTICLE: "xml.utilities" "Utilities for processing XML"
+ "Getting parts of an XML document or tag:"
+ $nl
+ "Note: the difference between deep-tag-named and tag-named is that the former searches recursively among all children and children of children of the tag, while the latter only looks at the direct children, and is therefore more efficient."
+ { $subsection tag-named }
+ { $subsection tags-named }
+ { $subsection deep-tag-named }
+ { $subsection deep-tags-named }
+ { $subsection get-id }
+ "To get at the contents of a single tag, use"
+ { $subsection children>string }
+ { $subsection children-tags }
+ { $subsection first-child-tag }
+ { $subsection assert-tag } ;
+
+HELP: deep-tag-named
+{ $values { "tag" "an XML tag or document" } { "name/string" "an XML name or string representing a name" } { "matching-tag" tag } }
+{ $description "Finds an XML tag with a matching name, recursively searching children and children of children." }
+{ $see-also tags-named tag-named deep-tags-named } ;
+
+HELP: deep-tags-named
+{ $values { "tag" "an XML tag or document" } { "name/string" "an XML name or string representing a name" } { "tags-seq" "a sequence of tags" } }
+{ $description "Returns a sequence of all tags of a matching name, recursively searching children and children of children." }
+{ $see-also tag-named deep-tag-named tags-named } ;
+
+HELP: children>string
+{ $values { "tag" "an XML tag or document" } { "string" "a string" } }
+{ $description "Concatenates the children of the tag, throwing an exception when there is a non-string child." } ;
+
+HELP: children-tags
+{ $values { "tag" "an XML tag or document" } { "sequence" sequence } }
+{ $description "Gets the children of the tag that are themselves tags." }
+{ $see-also first-child-tag } ;
+
+HELP: first-child-tag
+{ $values { "tag" "an XML tag or document" } { "tag" tag } }
+{ $description "Returns the first child of the given tag that is a tag." }
+{ $see-also children-tags } ;
+
+HELP: tag-named
+{ $values { "tag" "an XML tag or document" }
+ { "name/string" "an XML name or string representing the name" }
+ { "matching-tag" tag } }
+{ $description "Finds the first tag with matching name which is the direct child of the given tag." }
+{ $see-also deep-tags-named deep-tag-named tags-named } ;
+
+HELP: tags-named
+{ $values { "tag" "an XML tag or document" }
+ { "name/string" "an XML name or string representing the name" }
+ { "tags-seq" "a sequence of tags" } }
+{ $description "Finds all tags with matching name that are the direct children of the given tag." }
+{ $see-also deep-tag-named deep-tags-named tag-named } ;
+
+HELP: get-id
+{ $values { "tag" "an XML tag or document" } { "id" "a string" } { "elem" "an XML element or f" } }
+{ $description "Finds the XML tag with the specified id, ignoring the namespace." } ;
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: xml xml.utilities tools.test xml.data ;
IN: xml.utilities.tests
-USING: xml xml.utilities tools.test ;
[ "bar" ] [ "<foo>bar</foo>" string>xml children>string ] unit-test
[ "" ] [ "<foo></foo>" string>xml children>string ] unit-test
[ "" ] [ "<foo/>" string>xml children>string ] unit-test
+
+XML-NS: foo http://blah.com
+
+[ T{ name { main "bling" } { url "http://blah.com" } } ] [ "bling" foo ] unit-test
-! Copyright (C) 2005, 2006 Daniel Ehrenberg
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel namespaces sequences words io assocs
quotations strings parser lexer arrays xml.data xml.writer debugger
-splitting vectors sequences.deep combinators fry ;
+splitting vectors sequences.deep combinators fry memoize ;
IN: xml.utilities
-! * System for words specialized on tag names
-
-TUPLE: process-missing process tag ;
-M: process-missing error.
- "Tag <" write
- dup tag>> print-name
- "> not implemented on process process " write
- name>> print ;
-
-: run-process ( tag word -- )
- 2dup "xtable" word-prop
- [ dup main>> ] dip at* [ 2nip call ] [
- drop \ process-missing boa throw
- ] if ;
-
-: PROCESS:
- CREATE
- dup H{ } clone "xtable" set-word-prop
- dup '[ _ run-process ] define ; parsing
-
-: TAG:
- scan scan-word
- parse-definition
- swap "xtable" word-prop
- rot "/" split [ [ 2dup ] dip swap set-at ] each 2drop ;
- parsing
-
-
-! * Common utility functions
-
-: build-tag* ( items name -- tag )
- assure-name swap f swap <tag> ;
-
-: build-tag ( item name -- tag )
- [ 1array ] dip build-tag* ;
-
-: standard-prolog ( -- prolog )
- T{ prolog f "1.0" "UTF-8" f } ;
-
-: build-xml ( tag -- xml )
- standard-prolog { } rot { } <xml> ;
-
: children>string ( tag -- string )
children>> {
{ [ dup empty? ] [ drop "" ] }
: insert-child ( child tag -- )
[ 1vector ] dip insert-children ;
+
+: XML-NS:
+ CREATE-WORD (( string -- name )) over set-stack-effect
+ scan '[ f swap _ <name> ] define-memoized ; parsing
--- /dev/null
+Tools for printing XML, including prettyprinting
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.syntax help.markup io strings ;
+IN: xml.writer
+
+ABOUT: "xml.writer"
+
+ARTICLE: "xml.writer" "Writing XML"
+ "These words are used in implementing prettyprint"
+ { $subsection write-xml-chunk }
+ "These words are used to print XML normally"
+ { $subsection xml>string }
+ { $subsection write-xml }
+ "These words are used to prettyprint XML"
+ { $subsection pprint-xml>string }
+ { $subsection pprint-xml>string-but }
+ { $subsection pprint-xml }
+ { $subsection pprint-xml-but } ;
+
+HELP: write-xml-chunk
+{ $values { "object" "an XML element" } }
+{ $description "writes an XML element to " { $link output-stream } "." }
+{ $see-also write-xml-chunk write-xml } ;
+
+HELP: xml>string
+{ $values { "xml" "an xml document" } { "string" "a string" } }
+{ $description "converts an XML document into a string" }
+{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
+
+HELP: pprint-xml>string
+{ $values { "xml" "an xml document" } { "string" "a string" } }
+{ $description "converts an XML document into a string in a prettyprinted form." }
+{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
+
+HELP: write-xml
+{ $values { "xml" "an XML document" } }
+{ $description "prints the contents of an XML document to " { $link output-stream } "." }
+{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
+
+HELP: pprint-xml
+{ $values { "xml" "an XML document" } }
+{ $description "prints the contents of an XML document to " { $link output-stream } " in a prettyprinted form." }
+{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
+
+HELP: pprint-xml-but
+{ $values { "xml" "an XML document" } { "sensitive-tags" "a sequence of names" } }
+{ $description "Prettyprints an XML document, leaving the whitespace of the tags with names in sensitive-tags intact." }
+{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
+
+HELP: pprint-xml>string-but
+{ $values { "xml" "an XML document" } { "sensitive-tags" "a sequence of names" } { "string" string } }
+{ $description "Prettyprints an XML document, returning the result as a string and leaving the whitespace of the tags with names in sensitive-tags intact." }
+{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
+
+{ xml>string write-xml pprint-xml pprint-xml>string pprint-xml>string-but pprint-xml-but } related-words
+
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: xml.data xml.writer tools.test fry xml kernel multiline
+xml.writer.private io.streams.string xml.utilities sequences ;
IN: xml.writer.tests
-USING: xml.data xml.writer tools.test ;
+
+\ write-xml must-infer
+\ xml>string must-infer
+\ pprint-xml must-infer
+\ pprint-xml-but must-infer
[ "foo" ] [ T{ name { main "foo" } } name>string ] unit-test
+[ "foo" ] [ T{ name { space "" } { main "foo" } } name>string ] unit-test
[ "ns:foo" ] [ T{ name { space "ns" } { main "foo" } } name>string ] unit-test
+
+: reprints-as ( to from -- )
+ [ '[ _ ] ] [ '[ _ string>xml xml>string ] ] bi* unit-test ;
+
+: pprint-reprints-as ( to from -- )
+ [ '[ _ ] ] [ '[ _ string>xml pprint-xml>string ] ] bi* unit-test ;
+
+: reprints-same ( string -- ) dup reprints-as ;
+
+"<?xml version=\"1.0\" encoding=\"UTF-8\"?><x/>" reprints-same
+
+{" <?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE foo [<!ENTITY foo "bar">]>
+<x>bar</x> "}
+{" <?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE foo [<!ENTITY foo 'bar'>]>
+<x>&foo;</x> "} reprints-as
+
+{" <?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE foo [
+ <!ENTITY foo "bar">
+ <!ELEMENT br EMPTY>
+ <!ATTLIST list type (bullets|ordered|glossary) "ordered">
+ <!NOTATION foo bar>
+ <?baz bing bang bong?>
+ <!--wtf-->
+]>
+<x>
+ bar
+</x>"}
+{" <?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE foo [ <!ENTITY foo 'bar'> <!ELEMENT br EMPTY>
+<!ATTLIST list
+ type (bullets|ordered|glossary) "ordered">
+<!NOTATION foo bar> <?baz bing bang bong?>
+ <!--wtf-->
+]>
+<x>&foo;</x>"} pprint-reprints-as
+
+[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk xml-chunk>string = ] unit-test
+[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test
+[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><a b=\"c\"/>" ]
+ [ "<a b='c'/>" string>xml xml>string ] unit-test
+[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><foo>bar baz</foo>" ]
+[ "<foo>bar</foo>" string>xml [ " baz" append ] map xml>string ] unit-test
+[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<foo>\n bar\n</foo>" ]
+[ "<foo> bar </foo>" string>xml pprint-xml>string ] unit-test
+[ "<foo'>" ] [ "<foo'>" <unescaped> xml-chunk>string ] unit-test
-! Copyright (C) 2005, 2006 Daniel Ehrenberg\r
+! Copyright (C) 2005, 2009 Daniel Ehrenberg\r
! See http://factorcode.org/license.txt for BSD license.\r
USING: hashtables kernel math namespaces sequences strings\r
assocs combinators io io.streams.string accessors\r
SYMBOL: indenter\r
" " indenter set-global\r
\r
+<PRIVATE\r
+\r
: sensitive? ( tag -- ? )\r
sensitive-tags get swap '[ _ names-match? ] contains? ;\r
\r
[ [ empty? ] [ string? ] bi and not ] filter\r
] when ;\r
\r
+PRIVATE>\r
+\r
: name>string ( name -- string )\r
[ main>> ] [ space>> ] bi [ ":" rot 3append ] unless-empty ;\r
\r
: print-name ( name -- )\r
name>string write ;\r
\r
+<PRIVATE\r
+\r
: print-attrs ( assoc -- )\r
[\r
" " write\r
"\"" write\r
] assoc-each ;\r
\r
+PRIVATE>\r
+\r
GENERIC: write-xml-chunk ( object -- )\r
\r
+<PRIVATE\r
+\r
M: string write-xml-chunk\r
- escape-string dup empty? not xml-pprint? get and\r
- [ nl 80 indent-string indented-break ] when write ;\r
+ escape-string xml-pprint? get [\r
+ dup [ blank? ] all?\r
+ [ drop "" ]\r
+ [ nl 80 indent-string indented-break ] if\r
+ ] when write ;\r
\r
: write-tag ( tag -- )\r
?indent CHAR: < write1\r
} cleave\r
] dip xml-pprint? set ;\r
\r
+M: unescaped write-xml-chunk\r
+ string>> write ;\r
+\r
M: comment write-xml-chunk\r
"<!--" write text>> write "-->" write ;\r
\r
[ att-defs>> write ">" write ]\r
bi ;\r
\r
-M: entity-decl write-xml-chunk\r
- "<!ENTITY " write\r
+M: notation-decl write-xml-chunk\r
+ "<!NOTATION " write\r
[ name>> write " " write ]\r
- [ def>> write-xml-chunk ">" write ]\r
+ [ id>> write ">" write ]\r
bi ;\r
\r
+M: entity-decl write-xml-chunk\r
+ "<!ENTITY " write\r
+ [ pe?>> [ " % " write ] when ]\r
+ [ name>> write " \"" write ] [\r
+ def>> f xml-pprint?\r
+ [ write-xml-chunk ] with-variable\r
+ "\">" write\r
+ ] tri ;\r
+\r
M: system-id write-xml-chunk\r
"SYSTEM '" write system-literal>> write "'" write ;\r
\r
[ pubid-literal>> write "' '" write ]\r
[ system-literal>> write "'" write ] bi ;\r
\r
+: write-internal-subset ( dtd -- )\r
+ [\r
+ "[" write indent\r
+ directives>> [ ?indent write-xml-chunk ] each\r
+ unindent ?indent "]" write\r
+ ] when* ;\r
+\r
M: doctype-decl write-xml-chunk\r
- "<!DOCTYPE " write\r
+ ?indent "<!DOCTYPE " write\r
[ name>> write " " write ]\r
[ external-id>> [ write-xml-chunk " " write ] when* ]\r
- [\r
- internal-subset>>\r
- [ "[" write [ write-xml-chunk ] each "]" write ] when* ">" write\r
- ] tri ;\r
+ [ internal-subset>> write-internal-subset ">" write ] tri ;\r
\r
M: directive write-xml-chunk\r
- "<!" write text>> write CHAR: > write1 ;\r
+ "<!" write text>> write CHAR: > write1 nl ;\r
\r
M: instruction write-xml-chunk\r
"<?" write text>> write "?>" write ;\r
\r
+M: number write-xml-chunk\r
+ "Numbers are not allowed in XML" throw ;\r
+\r
M: sequence write-xml-chunk\r
[ write-xml-chunk ] each ;\r
\r
+PRIVATE>\r
+\r
: write-prolog ( xml -- )\r
"<?xml version=\"" write dup version>> write\r
"\" encoding=\"" write dup encoding>> write\r
M: xml write-xml-chunk\r
body>> write-xml-chunk ;\r
\r
-: print-xml ( xml -- )\r
- write-xml nl ;\r
-\r
: xml>string ( xml -- string )\r
[ write-xml ] with-string-writer ;\r
\r
-: with-xml-pprint ( sensitive-tags quot -- )\r
+: xml-chunk>string ( object -- string )\r
+ [ write-xml-chunk ] with-string-writer ;\r
+\r
+: pprint-xml-but ( xml sensitive-tags -- )\r
[\r
- swap [ assure-name ] map sensitive-tags set\r
+ [ assure-name ] map sensitive-tags set\r
0 indentation set\r
xml-pprint? on\r
- call\r
- ] with-scope ; inline\r
-\r
-: pprint-xml-but ( xml sensitive-tags -- )\r
- [ print-xml ] with-xml-pprint ;\r
+ write-xml\r
+ ] with-scope ;\r
\r
: pprint-xml ( xml -- )\r
f pprint-xml-but ;\r
\r
: pprint-xml>string-but ( xml sensitive-tags -- string )\r
- [ xml>string ] with-xml-pprint ;\r
+ [ pprint-xml-but ] with-string-writer ;\r
\r
: pprint-xml>string ( xml -- string )\r
f pprint-xml>string-but ;\r
-! Copyright (C) 2005, 2006 Daniel Ehrenberg\r
+! Copyright (C) 2005, 2009 Daniel Ehrenberg\r
! See http://factorcode.org/license.txt for BSD license.\r
-USING: help.markup help.syntax kernel xml.data xml.errors\r
-xml.writer state-parser xml.tokenize xml.utilities xml.entities\r
-strings sequences io xml.entities.html ;\r
+USING: help.markup help.syntax xml.data io strings ;\r
IN: xml\r
\r
HELP: string>xml\r
-{ $values { "string" "a string" } { "xml" "an xml document" } }\r
-{ $description "converts a string into an " { $link xml }\r
- " datatype for further processing" } ;\r
+{ $values { "string" string } { "xml" xml } }\r
+{ $description "Converts a string into an " { $link xml }\r
+ " tree for further processing." } ;\r
\r
HELP: read-xml\r
-{ $values { "stream" "a stream that supports readln" }\r
- { "xml" "an XML document" } }\r
-{ $description "exausts the given stream, reading an XML document from it" } ;\r
+{ $values { "stream" "an input stream" } { "xml" xml } }\r
+{ $description "Exausts the given stream, reading an XML document from it. A binary stream, one without encoding, should be used as input, and the encoding is automatically detected." } ;\r
\r
HELP: file>xml\r
-{ $values { "filename" "a string representing a filename" }\r
- { "xml" "an XML document" } }\r
-{ $description "opens the given file, reads it in as XML, closes the file and returns the corresponding XML tree" } ;\r
+{ $values { "filename" string } { "xml" xml } }\r
+{ $description "Opens the given file, reads it in as XML, closes the file and returns the corresponding XML tree. The encoding is automatically detected." } ;\r
\r
{ string>xml read-xml file>xml } related-words\r
\r
-HELP: xml>string\r
-{ $values { "xml" "an xml document" } { "string" "a string" } }\r
-{ $description "converts an xml document (" { $link xml } ") into a string" }\r
-{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
-\r
-HELP: pprint-xml>string\r
-{ $values { "xml" "an xml document" } { "string" "a string" } }\r
-{ $description "converts an xml document (" { $link xml } ") into a string in a prettyprinted form." }\r
-{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
-\r
-HELP: xml-parse-error\r
-{ $class-description "the exception class that all parsing errors in XML documents are in." } ;\r
-\r
-HELP: xml-reprint\r
-{ $values { "string" "a string of XML" } }\r
-{ $description "parses XML and prints it out again, for testing purposes" }\r
-{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
-\r
-HELP: write-xml\r
-{ $values { "xml" "an XML document" } }\r
-{ $description "prints the contents of an XML document (" { $link xml } ") to " { $link output-stream } "." }\r
-{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
-\r
-HELP: print-xml\r
-{ $values { "xml" "an XML document" } }\r
-{ $description "prints the contents of an XML document (" { $link xml } ") to " { $link output-stream } ", followed by a newline" }\r
-{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
-\r
-HELP: pprint-xml\r
-{ $values { "xml" "an XML document" } }\r
-{ $description "prints the contents of an XML document (" { $link xml } ") to " { $link output-stream } " in a prettyprinted form." }\r
-{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
-\r
-HELP: pprint-xml-but\r
-{ $values { "xml" "an XML document" } { "sensitive-tags" "a sequence of names" } }\r
-{ $description "Prettyprints an XML document, leaving the whitespace of the tags with names in sensitive-tags intact." }\r
-{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
-\r
-HELP: pprint-xml>string-but\r
-{ $values { "xml" "an XML document" } { "sensitive-tags" "a sequence of names" } { "string" string } }\r
-{ $description "Prettyprints an XML document, returning the result as a string and leaving the whitespace of the tags with names in sensitive-tags intact." }\r
-{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
-\r
-{ xml>string print-xml write-xml pprint-xml xml-reprint pprint-xml>string pprint-xml>string-but pprint-xml-but } related-words\r
-\r
-HELP: PROCESS:\r
-{ $syntax "PROCESS: word" }\r
-{ $values { "word" "a new word to define" } }\r
-{ $description "creates a new word to process XML tags" }\r
-{ $see-also POSTPONE: TAG: } ;\r
-\r
-HELP: TAG:\r
-{ $syntax "TAG: tag word definition... ;" }\r
-{ $values { "tag" "an xml tag name" } { "word" "an XML process" } }\r
-{ $description "defines what a process should do when it encounters a specific tag" }\r
-{ $examples { $code "PROCESS: x ( tag -- )\nTAG: a x drop \"hi\" write ;" } }\r
-{ $see-also POSTPONE: PROCESS: } ;\r
-HELP: build-tag*\r
-{ $values { "items" "sequence of elements" } { "name" "string" }\r
- { "tag" tag } }\r
-{ $description "builds a " { $link tag } " with the specified name, in the namespace \"\" and URL \"\" containing the children listed in item" }\r
-{ $see-also build-tag build-xml } ;\r
-\r
-HELP: build-tag\r
-{ $values { "item" "an element" } { "name" string } { "tag" tag } }\r
-{ $description "builds a " { $link tag } " with the specified name containing the single child item" }\r
-{ $see-also build-tag* build-xml } ;\r
-\r
-HELP: build-xml\r
-{ $values { "tag" tag } { "xml" "an XML document" } }\r
-{ $description "builds an XML document out of a tag" }\r
-{ $see-also build-tag* build-tag } ;\r
-\r
-HELP: tag\r
-{ $class-description "tuple representing an XML tag, delegating to a " { $link\r
-name } ", containing the slots attrs (an alist of names to strings) and children (a sequence). Tags implement the sequence protocol by acting like a sequence of its chidren, and the assoc protocol by acting like its attributes." }\r
-{ $see-also <tag> name contained-tag xml } ;\r
-\r
-HELP: <tag>\r
-{ $values { "name" "an XML tag name" }\r
- { "attrs" "an alist of names to strings" }\r
- { "children" sequence }\r
- { "tag" tag } }\r
-{ $description "constructs an XML " { $link tag } " with the name (not a string) and tag attributes specified in attrs and children specified" }\r
-{ $see-also tag <contained-tag> build-tag build-tag* } ;\r
-\r
-HELP: name\r
-{ $class-description "represents an XML name, with the fields space (a string representing the namespace, as written in the document, tag (a string of the actual name of the tag) and url (a string of the URL that the namespace points to)" }\r
-{ $see-also <name> tag } ;\r
-\r
-HELP: <name>\r
-{ $values { "space" "a string" } { "main" "a string" } { "url" "a string" }\r
- { "name" "an XML tag name" } }\r
-{ $description "creates a name tuple with the name-space space and the tag-name tag and the tag-url url." }\r
-{ $see-also name <tag> } ;\r
-\r
-HELP: contained-tag\r
-{ $class-description "delegates to tag representing a tag like <a/> with no contents. The tag attributes are accessed with tag-attrs" }\r
-{ $see-also tag <contained-tag> } ;\r
-\r
-HELP: <contained-tag>\r
-{ $values { "name" "an XML tag name" }\r
- { "attrs" "an alist from names to strings" }\r
- { "tag" tag } }\r
-{ $description "creates an empty tag (like <a/>) with the specified name and tag attributes. This delegates to tag" }\r
-{ $see-also contained-tag <tag> } ;\r
-\r
-HELP: xml\r
-{ $class-description "tuple representing an XML document, delegating to the main tag, containing the fields prolog (the header <?xml...?>), before (whatever comes between the prolog and the main tag) and after (whatever comes after the main tag)" }\r
-{ $see-also <xml> tag prolog } ;\r
-\r
-HELP: <xml>\r
-{ $values { "prolog" "an XML prolog" } { "before" "a sequence of XML elements" }\r
-{ "body" tag } { "after" "a sequence of XML elements" } { "xml" "an XML document" } }\r
-{ $description "creates an XML document, delegating to the main tag, with the specified prolog, before, and after" }\r
-{ $see-also xml <tag> } ;\r
-\r
-HELP: prolog\r
-{ $class-description "represents an XML prolog, with the tuple fields version (containing \"1.0\" or \"1.1\"), encoding (a string representing the encoding type), and standalone (t or f, whether the document is standalone without external entities)" }\r
-{ $see-also <prolog> xml } ;\r
-\r
-HELP: <prolog>\r
-{ $values { "version" "a string, 1.0 or 1.1" }\r
-{ "encoding" "a string" } { "standalone" "a boolean" } { "prolog" "an XML prolog" } }\r
-{ $description "creates an XML prolog tuple" }\r
-{ $see-also prolog <xml> } ;\r
-\r
-HELP: comment\r
-{ $class-description "represents a comment in XML. Has one slot, text, which contains the string of the comment" }\r
-{ $see-also <comment> } ;\r
-\r
-HELP: <comment>\r
-{ $values { "text" "a string" } { "comment" "a comment" } }\r
-{ $description "creates an XML comment tuple" }\r
-{ $see-also comment } ;\r
-\r
-HELP: instruction\r
-{ $class-description "represents an XML instruction, such as <?xsl stylesheet='foo.xml'?>. Contains one slot, text, which contains the string between the question marks." }\r
-{ $see-also <instruction> } ;\r
-\r
-HELP: <instruction>\r
-{ $values { "text" "a string" } { "instruction" "an XML instruction" } }\r
-{ $description "creates an XML parsing instruction, such as <?xsl stylesheet='foo.xml'?>." }\r
-{ $see-also instruction } ;\r
-\r
-HELP: names-match?\r
-{ $values { "name1" "a name" } { "name2" "a name" } { "?" "t or f" } }\r
-{ $description "checks to see if the two names match, that is, if all fields are equal, ignoring fields whose value is f in either name." }\r
-{ $example "USING: prettyprint xml.data ;" "T{ name f \"rpc\" \"methodCall\" f } T{ name f f \"methodCall\" \"http://www.xmlrpc.org/\" } names-match? ." "t" }\r
-{ $see-also name } ;\r
-\r
HELP: read-xml-chunk\r
{ $values { "stream" "an input stream" } { "seq" "a sequence of elements" } }\r
-{ $description "rather than parse a document, as " { $link read-xml } " does, this word parses and returns a sequence of XML elements (tags, strings, etc), ie a document fragment. This is useful for pieces of XML which may have more than one main tag." }\r
-{ $see-also write-xml-chunk read-xml } ;\r
-\r
-HELP: get-id\r
-{ $values { "tag" "an XML tag or document" } { "id" "a string" } { "elem" "an XML element or f" } }\r
-{ $description "finds the XML tag with the specified id, ignoring the namespace" }\r
-{ $see-also } ;\r
-\r
-HELP: process\r
-{ $values { "object" "an opener, closer, contained or text element" } }\r
-{ $description "takes an XML event and, using the XML stack, processes it and adds it to the tree" } ;\r
+{ $description "Rather than parse a document, as " { $link read-xml } " does, this word parses and returns a sequence of XML elements (tags, strings, etc), ie a document fragment. This is useful for pieces of XML which may have more than one main tag." }\r
+{ $see-also read-xml } ;\r
\r
-HELP: sax\r
+HELP: each-element\r
{ $values { "stream" "an input stream" } { "quot" "a quotation ( xml-elem -- )" } }\r
-{ $description "parses the XML document, and whenever an event is encountered (a tag piece, comment, parsing instruction, directive or string element), the quotation is called with that event on the stack. The quotation has all responsibility to deal with the event properly, and it is advised that generic words be used in dispatching on the event class." }\r
+{ $description "Parses the XML document, and whenever an event is encountered (a tag piece, comment, parsing instruction, directive or string element), the quotation is called with that event on the stack. The quotation has all responsibility to deal with the event properly, and it is advised that generic words be used in dispatching on the event class." }\r
{ $notes "It is important to note that this is not SAX, merely an event-based XML view" }\r
{ $see-also read-xml } ;\r
\r
-HELP: opener\r
-{ $class-description "describes an opening tag, like <a>. Contains two slots, name and attrs containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." }\r
-{ $see-also closer contained } ;\r
-\r
-HELP: closer\r
-{ $class-description "describes a closing tag, like </a>. Contains one slot, name, containing the tag's name. Usually, the name-url will be f." }\r
-{ $see-also opener contained } ;\r
-\r
-HELP: contained\r
-{ $class-description "represents a self-closing tag, like <a/>. Contains two slots, name and attrs containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." }\r
-{ $see-also opener closer } ;\r
-\r
-HELP: parse-text\r
-{ $values { "string" "a string" } }\r
-{ $description "moves the pointer from the current spot to the beginning of the next tag, parsing the text underneath, returning the text element it passed. This parses XML entities like &bar; a and &" }\r
-{ $see-also parse-name } ;\r
-\r
-HELP: parse-name\r
-{ $values { "name" "an XML name" } }\r
-{ $description "parses a " { $link name } " from the input stream. Returns a name with only the name-space and name-tag defined, with name-url=f" }\r
-{ $see-also parse-text } ;\r
-\r
-HELP: make-tag\r
-{ $values { "tag" "an opener, closer or contained" } }\r
-{ $description "assuming the pointer is just past a <, this word parses until the next > and emits a tuple representing the tag parsed" }\r
-{ $see-also opener closer contained } ;\r
-\r
HELP: pull-xml\r
-{ $class-description "represents the state of a pull-parser for XML. Has one slot, scope, which is a namespace which contains all relevant state information." }\r
+{ $class-description "Represents the state of a pull-parser for XML. Has one slot, scope, which is a namespace which contains all relevant state information." }\r
{ $see-also <pull-xml> pull-event pull-elem } ;\r
\r
HELP: <pull-xml>\r
{ $values { "pull-xml" "a pull-xml tuple" } }\r
-{ $description "creates an XML pull-based parser which reads from " { $link input-stream } ", executing all initial XML commands to set up the parser." }\r
+{ $description "Creates an XML pull-based parser which reads from " { $link input-stream } ", executing all initial XML commands to set up the parser." }\r
{ $see-also pull-xml pull-elem pull-event } ;\r
\r
HELP: pull-elem\r
{ $values { "pull" "an XML pull parser" } { "xml-elem/f" "an XML tag, string, or f" } }\r
-{ $description "gets the next XML element from the given XML pull parser. Returns f upon exhaustion." }\r
+{ $description "Gets the next XML element from the given XML pull parser. Returns f upon exhaustion." }\r
{ $see-also pull-xml <pull-xml> pull-event } ;\r
\r
HELP: pull-event\r
{ $values { "pull" "an XML pull parser" } { "xml-event/f" "an XML tag event, string, or f" } }\r
-{ $description "gets the next XML event from the given XML pull parser. Returns f upon exhaustion." }\r
+{ $description "Gets the next XML event from the given XML pull parser. Returns f upon exhaustion." }\r
{ $see-also pull-xml <pull-xml> pull-elem } ;\r
\r
-HELP: write-xml-chunk\r
-{ $values { "object" "an XML element" } }\r
-{ $description "writes an XML element to " { $link output-stream } "." }\r
-{ $see-also write-xml-chunk write-xml } ;\r
-\r
-HELP: deep-tag-named\r
-{ $values { "tag" "an XML tag or document" } { "name/string" "an XML name or string representing a name" } { "matching-tag" tag } }\r
-{ $description "finds an XML tag with a matching name, recursively searching children and children of children" }\r
-{ $see-also tags-named tag-named deep-tags-named } ;\r
-\r
-HELP: deep-tags-named\r
-{ $values { "tag" "an XML tag or document" } { "name/string" "an XML name or string representing a name" } { "tags-seq" "a sequence of tags" } }\r
-{ $description "returns a sequence of all tags of a matching name, recursively searching children and children of children" }\r
-{ $see-also tag-named deep-tag-named tags-named } ;\r
-\r
-HELP: children>string\r
-{ $values { "tag" "an XML tag or document" } { "string" "a string" } }\r
-{ $description "concatenates the children of the tag, ignoring everything that's not a string" } ;\r
-\r
-HELP: children-tags\r
-{ $values { "tag" "an XML tag or document" } { "sequence" sequence } }\r
-{ $description "gets the children of the tag that are themselves tags" }\r
-{ $see-also first-child-tag } ;\r
-\r
-HELP: first-child-tag\r
-{ $values { "tag" "an XML tag or document" } { "tag" tag } }\r
-{ $description "returns the first child of the given tag that is a tag" }\r
-{ $see-also children-tags } ;\r
-\r
-HELP: multitags\r
-{ $class-description "XML parsing error describing the case where there is more than one main tag in a document. Contains no slots" } ;\r
-\r
-HELP: notags\r
-{ $class-description "XML parsing error describing the case where an XML document contains no main tag, or any tags at all" } ;\r
-\r
-HELP: extra-attrs\r
-{ $class-description "XML parsing error describing the case where the XML prolog (<?xml ...?>) contains attributes other than the three allowed ones, standalone, version and encoding. Contains one slot, attrs, which is a hashtable of all the extra attributes' names. Delegates to " { $link parsing-error } "." } ;\r
-\r
-HELP: nonexist-ns\r
-{ $class-description "XML parsing error describing the case where a namespace doesn't exist but it is used in a tag. Contains one slot, name, which contains the name of the undeclared namespace, and delegates to " { $link parsing-error } "." } ;\r
-\r
-HELP: not-yes/no\r
-{ $class-description "XML parsing error used to describe the case where standalone is set in the XML prolog to something other than 'yes' or 'no'. Delegates to " { $link parsing-error } " and contains one slot, text, which contains offending value." } ;\r
-\r
-HELP: unclosed\r
-{ $class-description "XML parsing error used to describe the case where the XML document contains classes which are not closed by the end of the document. Contains one slot, tags, a sequence of names." } ;\r
-\r
-HELP: mismatched\r
-{ $class-description "XML parsing error describing mismatched tags, eg <a></c>. Contains two slots: open is the name of the opening tag and close is the name of the closing tag. Delegates to " { $link parsing-error } " showing the location of the closing tag" } ;\r
-\r
-HELP: expected\r
-{ $class-description "XML parsing error describing when an expected token was not present. Delegates to " { $link parsing-error } ". Contains two slots, should-be, which has the expected string, and was, which has the actual string." } ;\r
-\r
-HELP: no-entity\r
-{ $class-description "XML parsing error describing the use of an undefined entity in a case where standalone is marked yes. Delegates to " { $link parsing-error } ". Contains one slot, thing, containing a string representing the entity." } ;\r
-\r
-HELP: open-tag\r
-{ $class-description "represents a tag that does have children, ie is not a contained tag" }\r
-{ $notes "the constructor used for this class is simply " { $link <tag> } "." }\r
-{ $see-also tag contained-tag } ;\r
+HELP: read-dtd\r
+{ $values { "stream" "an input stream" } { "dtd" dtd } }\r
+{ $description "Exhausts a stream, producing a " { $link dtd } " from the contents." } ;\r
\r
-HELP: tag-named\r
-{ $values { "tag" "an XML tag or document" }\r
- { "name/string" "an XML name or string representing the name" }\r
- { "matching-tag" tag } }\r
-{ $description "finds the first tag with matching name which is the direct child of the given tag" }\r
-{ $see-also deep-tags-named deep-tag-named tags-named } ;\r
+HELP: file>dtd\r
+{ $values { "filename" string } { "dtd" dtd } }\r
+{ $description "Reads a file in UTF-8, converting it into an XML " { $link dtd } "." } ;\r
\r
-HELP: tags-named\r
-{ $values { "tag" "an XML tag or document" }\r
- { "name/string" "an XML name or string representing the name" }\r
- { "tags-seq" "a sequence of tags" } }\r
-{ $description "finds all tags with matching name that are the direct children of the given tag" }\r
-{ $see-also deep-tag-named deep-tags-named tag-named } ;\r
+HELP: string>dtd\r
+{ $values { "string" string } { "dtd" dtd } }\r
+{ $description "Interprets a string as an XML " { $link dtd } "." } ;\r
\r
-HELP: state-parse\r
-{ $values { "stream" "an input stream" } { "quot" "a quotation ( -- )" } }\r
-{ $description "takes a stream and runs an imperative parser on it, allowing words like " { $link next } " to be used within the context of the stream." } ;\r
-\r
-HELP: pre/post-content\r
-{ $class-description "describes the error where a non-whitespace string is used before or after the main tag in an XML document. Contains two slots: string contains the offending string, and pre? is t if it occured before the main tag and f if it occured after" } ;\r
-\r
-HELP: unclosed-quote\r
-{ $class-description "describes the error where a quotation for an attribute value is opened but not closed before the end of the document." } ;\r
-\r
-HELP: bad-name\r
-{ $class-description "describes the error where a name is used, for example in an XML tag or attribute key, which is invalid." } ;\r
-\r
-HELP: quoteless-attr\r
-{ $class-description "describes the error where an attribute of an XML tag is missing quotes around a value." } ;\r
-\r
-HELP: entities\r
-{ $description "a hash table from default XML entity names (like & and <) to the characters they represent. This is automatically included when parsing any XML document." }\r
-{ $see-also html-entities } ;\r
-\r
-HELP: html-entities\r
-{ $description "a hash table from HTML entity names to their character values" }\r
-{ $see-also entities with-html-entities } ;\r
-\r
-HELP: with-entities\r
-{ $values { "entities" "a hash table of strings to chars" }\r
- { "quot" "a quotation ( -- )" } }\r
-{ $description "calls the quotation using the given table of entity values (symbolizing, eg, that &foo; represents CHAR: a) on top of the default XML entities" }\r
-{ $see-also with-html-entities } ;\r
-\r
-HELP: with-html-entities\r
-{ $values { "quot" "a quotation ( -- )" } }\r
-{ $description "calls the given quotation using HTML entity values" }\r
-{ $see-also html-entities with-entities } ;\r
+{ read-dtd file>dtd string>dtd } related-words\r
\r
ARTICLE: { "xml" "reading" } "Reading XML"\r
"The following words are used to read something into an XML document"\r
{ $subsection read-xml }\r
{ $subsection read-xml-chunk }\r
{ $subsection string>xml-chunk }\r
- { $subsection file>xml } ;\r
-\r
-ARTICLE: { "xml" "writing" } "Writing XML"\r
- "These words are used in implementing prettyprint"\r
- { $subsection write-xml-chunk }\r
- "These words are used to print XML normally"\r
- { $subsection xml>string }\r
- { $subsection write-xml }\r
- { $subsection print-xml }\r
- "These words are used to prettyprint XML"\r
- { $subsection pprint-xml>string }\r
- { $subsection pprint-xml>string-but }\r
- { $subsection pprint-xml }\r
- { $subsection pprint-xml-but }\r
- "This word reads and writes XML"\r
- { $subsection xml-reprint } ;\r
-\r
-ARTICLE: { "xml" "classes" } "XML data classes"\r
- "Data types that XML documents are made of:"\r
- { $subsection name }\r
- { $subsection tag }\r
- { $subsection contained-tag }\r
- { $subsection open-tag }\r
- { $subsection xml }\r
- { $subsection prolog }\r
- { $subsection comment }\r
- { $subsection instruction } ;\r
-\r
-ARTICLE: { "xml" "construct" } "XML data constructors"\r
- "These data types are constructed with:"\r
- { $subsection <name> }\r
- { $subsection <tag> }\r
- { $subsection <contained-tag> }\r
- { $subsection <xml> }\r
- { $subsection <prolog> }\r
- { $subsection <comment> }\r
- { $subsection <instruction> } ;\r
-\r
-ARTICLE: { "xml" "utils" } "XML processing utilities"\r
- "Utilities for processing XML include..."\r
- $nl\r
- "System sfor creating words which dispatch on XML tags:"\r
- { $subsection POSTPONE: PROCESS: }\r
- { $subsection POSTPONE: TAG: }\r
- "Getting parts of an XML document or tag:"\r
- $nl\r
- "Note: the difference between deep-tag-named and tag-named is that the former searches recursively among all children and children of children of the tag, while the latter only looks at the direct children, and is therefore more efficient."\r
- { $subsection tag-named }\r
- { $subsection tags-named }\r
- { $subsection deep-tag-named }\r
- { $subsection deep-tags-named }\r
- { $subsection get-id }\r
- "Words for simplified generation of XML:"\r
- { $subsection build-tag* }\r
- { $subsection build-tag }\r
- { $subsection build-xml }\r
- "Other relevant words:"\r
- { $subsection children>string }\r
- { $subsection children-tags }\r
- { $subsection first-child-tag }\r
- { $subsection names-match? }\r
- { $subsection assert-tag } ;\r
-\r
-ARTICLE: { "xml" "internal" } "Internals of the XML parser"\r
- "The XML parser creates its own parsing framework to process XML documents. The parser operates on streams. Important words involved in processing are:"\r
- { $subsection parse-text }\r
- { $subsection make-tag }\r
- { $subsection parse-name }\r
- { $subsection process }\r
- "The XML parser is implemented using the libs/state-parser module. For more information, see " { $link { "state-parser" "main" } } ;\r
+ { $subsection file>xml }\r
+ "To read a DTD:"\r
+ { $subsection read-dtd }\r
+ { $subsection file>dtd }\r
+ { $subsection string>dtd } ;\r
\r
ARTICLE: { "xml" "events" } "Event-based XML parsing"\r
- "In addition to DOM-style parsing based around " { $link read-xml } ", the XML module also provides SAX-style event-based parsing. This uses much of the same data structures as normal XML, with the exception of the classes " { $link xml } " and " { $link tag } " and as such, the articles " { $link { "xml" "classes" } } " and " { $link { "xml" "construct" } } " may be useful in learning how to process documents in this way. Other useful words are:"\r
- { $subsection sax }\r
+ "In addition to DOM-style parsing based around " { $link read-xml } ", the XML module also provides SAX-style event-based parsing. This uses much of the same data structures as normal XML, with the exception of the classes " { $link xml } " and " { $link tag } " and as such, the article " { $vocab-link "xml.data" } " may be useful in learning how to process documents in this way. Other useful words are:"\r
+ { $subsection each-element }\r
{ $subsection opener }\r
{ $subsection closer }\r
{ $subsection contained }\r
{ $subsection pull-event }\r
{ $subsection pull-elem } ;\r
\r
-ARTICLE: { "xml" "errors" } "XML parsing errors"\r
- "The XML module provides a rich and highly inspectable set of parsing errors. All XML errors are described by the union class " { $link xml-parse-error } " but there are many classes contained in that:"\r
- { $subsection multitags }\r
- { $subsection notags }\r
- { $subsection extra-attrs }\r
- { $subsection nonexist-ns }\r
- { $subsection not-yes/no }\r
- { $subsection unclosed }\r
- { $subsection mismatched }\r
- { $subsection expected }\r
- { $subsection no-entity }\r
- { $subsection pre/post-content }\r
- { $subsection unclosed-quote }\r
- { $subsection bad-name }\r
- { $subsection quoteless-attr }\r
- "Additionally, most of these errors delegate to " { $link parsing-error } " in order to provide more information"\r
- $nl\r
- "Note that, in parsing an XML document, only the first error is reported." ;\r
-\r
-ARTICLE: { "xml" "entities" } "XML entities"\r
- "When XML is parsed, entities like &foo; are replaced with the characters they represent. A few entities like & and < are defined by default, but more are available, and the set of entities can be customized. Below are some words involved in XML entities, defined in the vocabulary 'entities':"\r
- { $subsection entities }\r
- { $subsection html-entities }\r
- { $subsection with-entities }\r
- { $subsection with-html-entities } ;\r
-\r
ARTICLE: "xml" "XML parser"\r
"The " { $vocab-link "xml" } " vocabulary implements the XML 1.0 and 1.1 standards, converting strings of text into XML and vice versa."\r
{ $subsection { "xml" "reading" } }\r
- { $subsection { "xml" "writing" } }\r
- { $subsection { "xml" "classes" } }\r
- { $subsection { "xml" "construct" } }\r
- { $subsection { "xml" "utils" } }\r
- { $subsection { "xml" "internal" } }\r
{ $subsection { "xml" "events" } }\r
- { $subsection { "xml" "errors" } }\r
- { $subsection { "xml" "entities" } } ;\r
-\r
-IN: xml\r
+ { $vocab-subsection "Writing XML" "xml.writer" }\r
+ { $vocab-subsection "XML parsing errors" "xml.errors" }\r
+ { $vocab-subsection "XML entities" "xml.entities" }\r
+ { $vocab-subsection "XML data types" "xml.data" }\r
+ { $vocab-subsection "Utilities for processing XML" "xml.utilities" }\r
+ { $vocab-subsection "Dispatch on XML tag names" "xml.dispatch" } ;\r
\r
ABOUT: "xml"\r
-! Copyright (C) 2005, 2006 Daniel Ehrenberg
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays io io.encodings.binary io.files
-io.streams.string kernel namespaces sequences state-parser strings
-xml.backend xml.data xml.errors xml.tokenize ascii
-xml.writer ;
+io.streams.string kernel namespaces sequences strings io.encodings.utf8
+xml.data xml.errors xml.elements ascii xml.entities
+xml.writer xml.state xml.autoencoding assocs xml.tokenize xml.name ;
IN: xml
-! -- Overall parser with data tree
+<PRIVATE
: add-child ( object -- )
xml-stack get peek second push ;
xml-stack get V{ { f V{ } } } =
[ bad-prolog ] unless drop ;
-M: instruction process
- xml-stack get length 1 =
- [ bad-instruction ] unless
- add-child ;
-
M: directive process
xml-stack get dup length 1 =
swap first second [ tag? ] contains? not and
<tag> add-child ;
: init-xml-stack ( -- )
- V{ } clone xml-stack set f push-xml ;
+ V{ } clone xml-stack set
+ extra-entities [ H{ } assoc-like ] change
+ f push-xml ;
: default-prolog ( -- prolog )
"1.0" "UTF-8" f <prolog> ;
SYMBOL: text-now?
+PRIVATE>
+
TUPLE: pull-xml scope ;
: <pull-xml> ( -- pull-xml )
[
] if text-now? set
] bind ;
+<PRIVATE
+
: done? ( -- ? )
xml-stack get length 1 = ;
[ (pull-elem) ] if
] if ;
+PRIVATE>
+
: pull-elem ( pull -- xml-elem/f )
[ init-xml-stack (pull-elem) ] with-scope ;
+<PRIVATE
+
: call-under ( quot object -- quot )
swap dup slip ; inline
-: sax-loop ( quot: ( xml-elem -- ) -- )
+: xml-loop ( quot: ( xml-elem -- ) -- )
parse-text call-under
- get-char [ make-tag call-under sax-loop ]
+ get-char [ make-tag call-under xml-loop ]
[ drop ] if ; inline recursive
-: sax ( stream quot: ( xml-elem -- ) -- )
+PRIVATE>
+
+: each-element ( stream quot: ( xml-elem -- ) -- )
swap [
reset-prolog init-ns-stack
start-document [ call-under ] when*
- sax-loop
- ] state-parse ; inline recursive
+ xml-loop
+ ] with-state ; inline
: (read-xml) ( -- )
start-document [ process ] when*
- [ process ] sax-loop ; inline
+ [ process ] xml-loop ; inline
: (read-xml-chunk) ( stream -- prolog seq )
[
done? [ unclosed ] unless
xml-stack get first second
prolog-data get swap
- ] state-parse ;
+ ] with-state ;
: read-xml ( stream -- xml )
- #! Produces a tree of XML nodes
- (read-xml-chunk) make-xml-doc ;
+ 0 depth
+ [ (read-xml-chunk) make-xml-doc ] with-variable ;
: read-xml-chunk ( stream -- seq )
- (read-xml-chunk) nip ;
+ 1 depth
+ [ (read-xml-chunk) nip ] with-variable ;
: string>xml ( string -- xml )
- <string-reader> read-xml ;
+ t string-input?
+ [ <string-reader> read-xml ] with-variable ;
: string>xml-chunk ( string -- xml )
t string-input?
[ <string-reader> read-xml-chunk ] with-variable ;
: file>xml ( filename -- xml )
- ! Autodetect encoding!
binary <file-reader> read-xml ;
-: xml-reprint ( string -- )
- string>xml print-xml ;
+: read-dtd ( stream -- dtd )
+ [
+ reset-prolog
+ H{ } clone extra-entities set
+ take-internal-subset
+ ] with-state ;
+
+: file>dtd ( filename -- dtd )
+ utf8 <file-reader> read-dtd ;
+: string>dtd ( string -- dtd )
+ <string-reader> read-dtd ;
-USING: xmode.tokens xmode.marker xmode.catalog kernel
+USING: xmode.tokens xmode.marker xmode.catalog kernel locals
html.elements io io.files sequences words io.encodings.utf8
-namespaces xml.entities accessors ;
+namespaces xml.entities accessors xml.interpolate locals xml.writer ;
IN: xmode.code2html
-: htmlize-tokens ( tokens -- )
+: htmlize-tokens ( tokens -- xml )
[
[ str>> ] [ id>> ] bi [
- <span name>> =class span> escape-string write </span>
- ] [
- escape-string write
- ] if*
- ] each ;
+ name>> swap
+ [XML <span class=<->><-></span> XML]
+ ] [ ] if*
+ ] map ;
-: htmlize-line ( line-context line rules -- line-context' )
+: htmlize-line ( line-context line rules -- line-context' xml )
tokenize-line htmlize-tokens ;
-: htmlize-lines ( lines mode -- )
- f swap load-mode [ htmlize-line nl ] curry reduce drop ;
+: htmlize-lines ( lines mode -- xml )
+ [ f ] 2dip load-mode [ htmlize-line ] curry map nip ;
-: default-stylesheet ( -- )
- <style>
- "resource:basis/xmode/code2html/stylesheet.css"
- utf8 file-contents escape-string write
- </style> ;
+: default-stylesheet ( -- xml )
+ "resource:basis/xmode/code2html/stylesheet.css"
+ utf8 file-contents
+ [XML <style><-></style> XML] ;
-: htmlize-stream ( path stream -- )
- lines swap
- <html>
+:: htmlize-stream ( path stream -- xml )
+ stream lines
+ [ "" ] [ first find-mode path swap htmlize-lines ]
+ if-empty :> input
+ default-stylesheet :> stylesheet
+ <XML <html>
<head>
- default-stylesheet
- <title> dup escape-string write </title>
+ <-stylesheet->
+ <title><-path-></title>
</head>
<body>
- <pre>
- over empty?
- [ 2drop ]
- [ over first find-mode htmlize-lines ] if
- </pre>
+ <pre><-input-></pre>
</body>
- </html> ;
+ </html> XML> ;
: htmlize-file ( path -- )
dup utf8 [
dup ".html" append utf8 [
- input-stream get htmlize-stream
+ input-stream get htmlize-stream write-xml
] with-file-writer
] with-file-reader ;
[ ch>upper ] dip rules>> at ?push-all ;
: get-rules ( char ruleset -- seq )
- f -rot [ get-char-rules ] keep get-always-rules ;
+ [ f ] 2dip [ get-char-rules ] keep get-always-rules ;
GENERIC: handle-rule-start ( match-count rule -- )
: child-tags ( tag -- seq ) children>> [ tag? ] filter ;
: map-find ( seq quot -- result elt )
- f -rot
+ [ f ] 2dip
'[ nip @ dup ] find
[ [ drop f ] unless ] dip ; inline
-! Copyright (C) 2007 Daniel Ehrenberg, Slava Pestov, and Doug Coleman
+! Copyright (C) 2007, 2009 Daniel Ehrenberg, Slava Pestov, and Doug Coleman
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax kernel sequences
-sequences.private namespaces math quotations ;
+sequences.private namespaces math quotations assocs.private ;
IN: assocs
ARTICLE: "alists" "Association lists"
{ $subsection enum }
{ $subsection <enum> }
"Inverting a permutation using enumerations:"
-{ $example "USING: assocs sorting prettyprint ;" ": invert <enum> >alist sort-values keys ;" "{ 2 0 4 1 3 } invert ." "{ 1 3 0 4 2 }" } ;
+{ $example "USING: assocs sorting prettyprint ;" "IN: scratchpad" ": invert ( perm -- perm' )" " <enum> >alist sort-values keys ;" "{ 2 0 4 1 3 } invert ." "{ 1 3 0 4 2 }" } ;
HELP: enum
{ $class-description "An associative structure which wraps a sequence and maps integers to the corresponding elements of the sequence."
{ $subsection assoc-each }
{ $subsection assoc-find }
{ $subsection assoc-map }
-{ $subsection assoc-push-if }
{ $subsection assoc-filter }
{ $subsection assoc-filter-as }
{ $subsection assoc-contains? }
{ $subsection cache }
{ $subsection map>assoc }
{ $subsection assoc>map }
-{ $subsection assoc-map-as }
-{ $subsection search-alist }
-"Utility word:"
-{ $subsection assoc-pusher } ;
+{ $subsection assoc-map-as } ;
ARTICLE: "assocs" "Associative mapping operations"
"An " { $emphasis "associative mapping" } ", abbreviated " { $emphasis "assoc" } ", is a collection of key/value pairs which provides efficient lookup and storage indexed by key."
{ assoc-map assoc-map-as } related-words
-HELP: assoc-push-if
-{ $values { "accum" "a resizable mutable sequence" } { "quot" { $quotation "( key value -- ? )" } } { "key" object } { "value" object } }
-{ $description "If the quotation yields true when applied to the key/value pair, adds the key/value pair at the end of " { $snippet "accum" } "." } ;
-
HELP: assoc-filter
{ $values { "assoc" assoc } { "quot" { $quotation "( key value -- ? )" } } { "subassoc" "a new assoc" } }
{ $description "Outputs an assoc of the same type as " { $snippet "assoc" } " consisting of all entries for which the predicate quotation yields true." } ;
{ $description "Applies the quotation to each entry in the input assoc and collects the results in a new assoc of the stame type as the exemplar." }
{ $examples { $example "USING: prettyprint assocs hashtables math ;" " H{ { 1 2 } { 3 4 } } [ sq ] { } assoc-map-as ." "{ { 1 4 } { 3 16 } }" } } ;
-HELP: assoc-pusher
-{ $values
- { "quot" "a predicate quotation" }
- { "quot'" quotation } { "accum" assoc } }
-{ $description "Creates a new " { $snippet "assoc" } " to accumulate the key/value pairs which return true for a predicate. Returns a new quotation which accepts a pair of object to be tested and stored in the accumulator if the test yields true. The accumulator is left on the stack for convenience." }
-{ $example "! Find only the pairs that sum to 5:" "USING: prettyprint assocs math kernel ;"
- "{ { 1 2 } { 2 3 } { 3 4 } } [ + 5 = ] assoc-pusher [ assoc-each ] dip ."
- "V{ { 2 3 } }"
-}
-{ $notes "Used to implement the " { $link assoc-filter } " word." } ;
-
-
HELP: extract-keys
{ $values
{ "seq" sequence } { "assoc" assoc }
{ $values
{ "key" object } { "alist" "an array of key/value pairs" }
{ "pair/f" "a key/value pair" } { "i/f" integer } }
-{ $description "Performs an in-order traversal of a " { $snippet "alist" } " and stops when the key is matched or the end of the " { $snippet "alist" } " has been reached. If there is no match, both outputs are " { $link f } "." }
-{ $examples { $example "USING: prettyprint assocs kernel ;"
+{ $description "Iterates over " { $snippet "alist" } " and stops when the key is matched or the end of the " { $snippet "alist" } " has been reached. If there is no match, both outputs are " { $link f } "." }
+{ $notes "This word is used to implement " { $link at* } " and " { $link set-at } " on sequences, and should not be called direclty." }
+{ $examples { $example "USING: prettyprint assocs.private kernel ;"
"3 { { 1 2 } { 3 4 } } search-alist [ . ] bi@"
"{ 3 4 }\n1"
- } { $example "USING: prettyprint assocs kernel ;"
+ } { $example "USING: prettyprint assocs.private kernel ;"
"6 { { 1 2 } { 3 4 } } search-alist [ . ] bi@"
"f\nf"
}
[ "x" ] [
"a" H{ { "a" "x" } } at-default
+] unit-test
+
+[ H{ { "b" [ 2 ] } { "d" [ 4 ] } } H{ { "a" [ 1 ] } { "c" [ 3 ] } } ] [
+ H{
+ { "a" [ 1 ] }
+ { "b" [ 2 ] }
+ { "c" [ 3 ] }
+ { "d" [ 4 ] }
+ } [ nip first even? ] assoc-partition
] unit-test
\ No newline at end of file
MIXIN: assoc
GENERIC: at* ( key assoc -- value/f ? )
+GENERIC: value-at* ( value assoc -- key/f ? )
GENERIC: set-at ( value key assoc -- )
GENERIC: new-assoc ( capacity exemplar -- newassoc )
GENERIC: delete-at ( key assoc -- )
GENERIC: clear-assoc ( assoc -- )
GENERIC: assoc-size ( assoc -- n )
GENERIC: assoc-like ( assoc exemplar -- newassoc )
+GENERIC: assoc-clone-like ( assoc exemplar -- newassoc )
+GENERIC: >alist ( assoc -- newassoc )
M: assoc assoc-like drop ;
-GENERIC: assoc-clone-like ( assoc exemplar -- newassoc )
-
-GENERIC: >alist ( assoc -- newassoc )
+<PRIVATE
: (assoc-each) ( assoc quot -- seq quot' )
[ >alist ] dip [ first2 ] prepose ; inline
+: (assoc-stack) ( key i seq -- value )
+ over 0 < [
+ 3drop f
+ ] [
+ 3dup nth-unsafe at*
+ [ [ 3drop ] dip ] [ drop [ 1- ] dip (assoc-stack) ] if
+ ] if ; inline recursive
+
+: search-alist ( key alist -- pair/f i/f )
+ [ first = ] with find swap ; inline
+
+: substituter ( assoc -- quot )
+ [ dupd at* [ nip ] [ drop ] if ] curry ; inline
+
+: with-assoc ( assoc quot: ( value key -- assoc ) -- quot: ( key value -- ) )
+ curry [ swap ] prepose ; inline
+
+PRIVATE>
+
: assoc-find ( assoc quot -- key value ? )
(assoc-each) find swap [ first2 t ] [ drop f f f ] if ; inline
: assoc-map ( assoc quot -- newassoc )
over assoc-map-as ; inline
-: assoc-push-if ( key value quot accum -- )
- [ 2keep ] dip [ [ 2array ] dip push ] 3curry when ; inline
-
-: assoc-pusher ( quot -- quot' accum )
- V{ } clone [ [ assoc-push-if ] 2curry ] keep ; inline
-
: assoc-filter-as ( assoc quot exemplar -- subassoc )
- [ assoc-pusher [ assoc-each ] dip ] dip assoc-like ; inline
+ [ (assoc-each) filter ] dip assoc-like ; inline
: assoc-filter ( assoc quot -- subassoc )
over assoc-filter-as ; inline
+: assoc-partition ( assoc quot -- true-assoc false-assoc )
+ [ (assoc-each) partition ] [ drop ] 2bi
+ tuck [ assoc-like ] 2bi@ ; inline
+
: assoc-contains? ( assoc quot -- ? )
assoc-find 2nip ; inline
2dup at* [ 2nip ] [ 2drop ] if ; inline
M: assoc assoc-clone-like ( assoc exemplar -- newassoc )
- over assoc-size swap new-assoc
- [ [ swapd set-at ] curry assoc-each ] keep ;
+ [ dup assoc-size ] dip new-assoc
+ [ [ set-at ] with-assoc assoc-each ] keep ;
: keys ( assoc -- keys )
[ drop ] { } assoc>map ;
[ at* ] 2keep delete-at ;
: rename-at ( newkey key assoc -- )
- [ delete-at* ] keep [ swapd set-at ] curry [ 2drop ] if ;
+ [ delete-at* ] keep [ set-at ] with-assoc [ 2drop ] if ;
: assoc-empty? ( assoc -- ? )
- assoc-size zero? ;
-
-: (assoc-stack) ( key i seq -- value )
- over 0 < [
- 3drop f
- ] [
- 3dup nth-unsafe at*
- [ [ 3drop ] dip ] [ drop [ 1- ] dip (assoc-stack) ] if
- ] if ; inline recursive
+ assoc-size 0 = ;
: assoc-stack ( key seq -- value )
[ length 1- ] keep (assoc-stack) ; flushable
: assoc-subset? ( assoc1 assoc2 -- ? )
- [ swapd at* [ = ] [ 2drop f ] if ] curry assoc-all? ;
+ [ at* [ = ] [ 2drop f ] if ] with-assoc assoc-all? ;
: assoc= ( assoc1 assoc2 -- ? )
[ assoc-subset? ] [ swap assoc-subset? ] 2bi and ;
: assoc-hashcode ( n assoc -- code )
- [
- [ over ] dip hashcode* 2/ [ dupd hashcode* ] dip bitxor
- ] { } assoc>map hashcode* ;
+ >alist hashcode* ;
: assoc-intersect ( assoc1 assoc2 -- intersection )
swap [ nip key? ] curry assoc-filter ;
: update ( assoc1 assoc2 -- )
- swap [ swapd set-at ] curry assoc-each ;
+ swap [ set-at ] with-assoc assoc-each ;
: assoc-union ( assoc1 assoc2 -- union )
[ [ [ assoc-size ] bi@ + ] [ drop ] 2bi new-assoc ] 2keep
: remove-all ( assoc seq -- subseq )
swap [ key? not ] curry filter ;
-: substituter ( assoc -- quot )
- [ dupd at* [ nip ] [ drop ] if ] curry ; inline
-
: substitute-here ( seq assoc -- )
substituter change-each ;
: extract-keys ( seq assoc -- subassoc )
[ [ dupd at ] curry ] keep map>assoc ;
-GENERIC: value-at* ( value assoc -- key/f ? )
-
M: assoc value-at* swap [ = nip ] curry assoc-find nip ;
: value-at ( value assoc -- key/f ) value-at* drop ;
: unzip ( assoc -- keys values )
dup assoc-empty? [ drop { } { } ] [ >alist flip first2 ] if ;
-: search-alist ( key alist -- pair/f i/f )
- [ first = ] with find swap ; inline
-
M: sequence at*
search-alist [ second t ] [ f ] if ;
M: sequence clear-assoc delete-all ;
M: sequence delete-at
- tuck search-alist nip
+ [ nip ] [ search-alist nip ] 2bi
[ swap delete-nth ] [ drop ] if* ;
M: sequence assoc-size length ;
! Now we have ( syntax-quot arch-quot layouts-quot ) on the stack
! Bring up a bare cross-compiling vocabulary.
-"syntax" vocab vocab-words bootstrap-syntax set
-H{ } clone dictionary set
-H{ } clone new-classes set
-H{ } clone changed-definitions set
-H{ } clone changed-generics set
-H{ } clone remake-generics set
-H{ } clone forgotten-definitions set
-H{ } clone root-cache set
-H{ } clone source-files set
-H{ } clone update-map set
-H{ } clone implementors-map set
+"syntax" vocab vocab-words bootstrap-syntax set {
+ dictionary
+ new-classes
+ changed-definitions changed-generics
+ remake-generics forgotten-definitions
+ root-cache source-files update-map implementors-map
+} [ H{ } clone swap set ] each
+
init-caches
! Vocabulary for slot accessors
"vocabulary"
{ "def" { "quotation" "quotations" } initial: [ ] }
"props"
- { "compiled" read-only }
+ { "optimized" read-only }
{ "counter" { "fixnum" "math" } }
{ "sub-primitive" read-only }
} define-builtin
! using the host image's hashing algorithms. We don't
! use each-object here since the catch stack isn't yet
! set up.
+ gc
begin-scan
[ hashtable? ] pusher [ (each-object) ] dip
end-scan
: min-class ( class seq -- class/f )\r
over [ classes-intersect? ] curry filter\r
[ drop f ] [\r
- tuck [ class<= ] with all? [ peek ] [ drop f ] if\r
+ [ nip ] [ [ class<= ] with all? ] 2bi [ peek ] [ drop f ] if\r
] if-empty ;\r
\r
GENERIC: (flatten-class) ( class -- )\r
dup "predicate" word-prop
dup length 1 = [
first
- tuck "predicating" word-prop =
+ [ nip ] [ "predicating" word-prop = ] 2bi
[ forget ] [ drop ] if
] [ 2drop ] if ;
#! class-usages of the member, now that it's been added.
[ 2drop ] [
[ [ suffix ] change-mixin-class ] 2keep
- tuck [ new-class? ] either? [
+ [ nip ] [ [ new-class? ] either? ] 2bi [
update-classes/new
] [
update-classes
{ $subsection singleton-class? }
{ $subsection singleton-class } ;
-HELP: SINGLETON:
-{ $syntax "SINGLETON: class" }
-{ $values
- { "class" "a new singleton to define" }
-}
-{ $description
- "Defines a new singleton class. The class word itself is the sole instance of the singleton class."
-}
-{ $examples
- { $example "USING: classes.singleton kernel io ;" "IN: scratchpad" "SINGLETON: foo\nGENERIC: bar ( obj -- )\nM: foo bar drop \"a foo!\" print ;\nfoo bar" "a foo!" }
-} ;
-
HELP: define-singleton-class
{ $values { "word" "a new word" } }
{ $description
$nl
"The second is to use ad-hoc slot polymorphism. If two classes define a slot with the same name, then code which uses " { $link "accessors" } " can operate on instances of both objects, assuming the values stored in that slot implement a common protocol. This allows code to be shared without creating contrieved relationships between classes."
{ $heading "Anti-pattern #3: subclassing to override a method definition" }
-"While method overriding is a very powerful tool, improper use can cause tight coupling of code and lead to difficulty in testing and refactoring. Subclassing should not be used as a means of ``monkey patching'' methods to fix bugs and add features. Only subclass from classes which were designed to be inherited from, and when writing classes of your own which are intended to be subclassed, clearly document that subclasses may and may not do. This includes construction policy; document whether subclasses should use " { $link new } ", " { $link boa } ", or a custom parametrized constructor."
+"While method overriding is a very powerful tool, improper use can cause tight coupling of code and lead to difficulty in testing and refactoring. Subclassing should not be used as a means of “monkey patching†methods to fix bugs and add features. Only subclass from classes which were designed to be inherited from, and when writing classes of your own which are intended to be subclassed, clearly document that subclasses may and may not do. This includes construction policy; document whether subclasses should use " { $link new } ", " { $link boa } ", or a custom parametrized constructor."
{ $see-also "parametrized-constructors" } ;
ARTICLE: "tuple-subclassing" "Tuple subclassing"
HELP: boa
{ $values { "..." "slot values" } { "class" tuple-class } { "tuple" tuple } }
{ $description "Creates a new instance of " { $snippet "class" } " and fill in the slots from the stack, with the top-most stack element being stored in the right-most slot." }
-{ $notes "The name " { $snippet "boa" } " is shorthand for ``by order of arguments'', and ``BOA constructor'' is a pun on ``boa constrictor''." }
+{ $notes "The name " { $snippet "boa" } " is shorthand for “by order of argumentsâ€, and “BOA constructor†is a pun on “boa constrictorâ€." }
{ $errors "Throws an error if the slot values do not match class declarations on slots (see" { $link "tuple-declarations" } ")." } ;
IN: compiler.units.tests
-USING: definitions compiler.units tools.test arrays sequences ;
+USING: definitions compiler.units tools.test arrays sequences words kernel
+accessors namespaces fry ;
[ flushed-dependency ] [ f flushed-dependency strongest-dependency ] unit-test
[ flushed-dependency ] [ flushed-dependency f strongest-dependency ] unit-test
[ inlined-dependency ] [ called-dependency inlined-dependency strongest-dependency ] unit-test
[ flushed-dependency ] [ called-dependency flushed-dependency strongest-dependency ] unit-test
[ called-dependency ] [ called-dependency f strongest-dependency ] unit-test
+
+! Non-optimizing compiler bugs
+[ 1 1 ] [
+ "A" "B" <word> [ [ 1 ] dip ] >>def dup f 2array 1array modify-code-heap
+ 1 swap execute
+] unit-test
+
+[ "A" "B" ] [
+ gensym "a" set
+ gensym "b" set
+ [
+ "a" get [ "A" ] define
+ "b" get "a" get '[ _ execute ] define
+ ] with-compilation-unit
+ "b" get execute
+ [
+ "a" get [ "B" ] define
+ ] with-compilation-unit
+ "b" get execute
+] unit-test
\ No newline at end of file
dup dup changed-vocabs update ;
: compile ( words -- )
- recompile-hook get call
- dup [ drop crossref? ] assoc-contains?
- modify-code-heap ;
+ recompile-hook get call modify-code-heap ;
SYMBOL: outdated-tuples
SYMBOL: update-tuples-hook
call-recompile-hook
call-update-tuples-hook
unxref-forgotten-definitions
- dup [ drop crossref? ] assoc-contains? modify-code-heap ;
+ modify-code-heap ;
: with-nested-compilation-unit ( quot -- )
[
USING: help.markup help.syntax kernel kernel.private
continuations.private vectors arrays namespaces
-assocs words quotations lexer sequences ;
+assocs words quotations lexer sequences math ;
IN: continuations
ARTICLE: "errors-restartable" "Restartable errors"
$nl
"In most other cases, " { $link cleanup } " should be used instead to handle an error and rethrow it automatically."
{ $heading "Anti-pattern #3: Dropping and rethrowing" }
-"Do not use " { $link recover } " to handle an error by dropping it and throwing a new error. By losing the original error message, you signal to the user that something failed without leaving any indication of what actually went wrong. Either wrap the error in a new error containing additional information, or rethrow the original error. A more subtle form of this is using " { $link throw } " instead of " { $link rethrow } ". The " { $link throw } " word should only be used when throwing new errors, and never when rethrowing errors that have been caught."
+"Do not use " { $link recover } " to handle an error by dropping it and throwing a new error. By losing the original error message, you signal to the user that something failed without leaving any indication of what actually went wrong. Either wrap the error in a new error containing additional information, or rethrow the original error. A more subtle form of this is using " { $link throw } " instead of " { $link rethrow } ". The " { $link throw } " word should only be used when throwing new errors, and never when rethrowing errors that have been caught."
{ $heading "Anti-pattern #4: Logging and rethrowing" }
"If you are going to rethrow an error, do not log a message. If you do so, the user will see two log messages for the same error, which will clutter logs without adding any useful information." ;
HELP: retry
{ $values
- { "quot" quotation } { "n" null }
+ { "quot" quotation } { "n" integer }
}
{ $description "Tries the quotation up to " { $snippet "n" } " times until it returns true. Retries the quotation if an exception is thrown or if the quotation returns " { $link f } ". The quotation is expected to have side effects that may fail, such as generating a random name for a new file until successful." }
{ $examples
+ "Try to get a 0 as a random number:"
{ $unchecked-example "USING: continuations math prettyprint ;"
- "[ 5 random 0 = ] retry t"
+ "[ 5 random 0 = ] 5 retry t"
"t"
}
} ;
ERROR: bad-effect ;
: parse-effect-token ( end -- token/f )
- scan tuck = [ drop f ] [
+ scan [ nip ] [ = ] 2bi [ drop f ] [
dup { f "(" "((" } member? [ bad-effect ] [
":" ?tail [
scan-word {
"methods" word-prop keys sort-classes ;
: specific-method ( class generic -- method/f )
- tuck order min-class dup [ swap method ] [ 2drop f ] if ;
+ [ nip ] [ order min-class ] 2bi
+ dup [ swap method ] [ 2drop f ] if ;
GENERIC: effective-method ( generic -- method )
[ init-hash ] [ array>> [ drop ((empty)) ] change-each ] bi ;
M: hashtable delete-at ( key hash -- )
- tuck key@ [
+ [ nip ] [ key@ ] 2bi [
[ ((tombstone)) dup ] 2dip set-nth-pair
hash-deleted+
] [
-! Copyright (C) 2007, 2008 Slava Pestov.
+! Copyright (C) 2007, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: init kernel system namespaces io io.encodings
io.encodings.utf8 init assocs splitting alien ;
+++ /dev/null
-Daniel Ehrenberg
+++ /dev/null
-USING: help.syntax help.markup ;
-IN: io.encodings.binary
-
-HELP: binary
-{ $class-description "Encoding descriptor for binary I/O." } ;
-
-ARTICLE: "io.encodings.binary" "Binary encoding"
-"Making an encoded stream with the binary encoding is a no-op; streams with this encoding deal with byte-arrays, not strings."
-{ $subsection binary } ;
-
-ABOUT: "io.encodings.binary"
+++ /dev/null
-! Copyright (C) 2008 Daniel Ehrenberg.
-! See http://factorcode.org/license.txt for BSD license.
-USING: io.encodings kernel ;
-IN: io.encodings.binary
-
-SINGLETON: binary
-M: binary <encoder> drop ;
-M: binary <decoder> drop ;
+++ /dev/null
-Dummy encoding for binary I/O
{ $description "A code point that replaces input that could not be decoded. The presence of this character in the decoded data usually signifies an error." } ;
ARTICLE: "encodings-descriptors" "Encoding descriptors"
-"An encoding descriptor is something which can be used for input or output streams to encode or decode bytes stored in a certain representation. It must conform to the " { $link "encodings-protocol" } ". Encodings which you can use are defined in the following vocabularies:"
+"An encoding descriptor is something which can be used with binary input or output streams to encode or decode bytes stored in a certain representation. It must conform to the " { $link "encodings-protocol" } ". Encodings which you can use are defined in the following vocabularies:"
{ $subsection "io.encodings.binary" }
{ $subsection "io.encodings.utf8" }
{ $subsection "io.encodings.utf16" }
{ $subsection <decoder> } ;
ARTICLE: "io.encodings" "I/O encodings"
-"The " { $vocab-link "io.encodings" } " vocabulary provides utilities for encoding and decoding bytes that represent text. Both strings and streams may be encoded."
+"The " { $vocab-link "io.encodings" } " vocabulary provides utilities for encoding and decoding bytes that represent text. Encodings can be used in the following situations:"
+{ $list
+ "With binary input streams, to convert bytes to characters"
+ "With binary output streams, to convert characters to bytes"
+ "With byte arrays, to convert bytes to characters"
+ "With strings, to convert characters to bytes"
+}
{ $subsection "encodings-descriptors" }
{ $subsection "encodings-constructors" }
{ $subsection "io.encodings.string" }
{ $subsection re-decode }
"Combinators to change the encoding:"
{ $subsection with-encoded-output }
-{ $subsection with-decoded-input } ;
+{ $subsection with-decoded-input }
+{ $see-also "encodings-introduction" "stream-elements" } ;
ABOUT: "io.encodings"
USING: help.markup help.syntax quotations hashtables kernel
-classes strings continuations destructors math ;
+classes strings continuations destructors math byte-arrays ;
IN: io
HELP: stream-readln
$io-error ;
HELP: stream-read1
-{ $values { "stream" "an input stream" } { "ch/f" "a character or " { $link f } } }
-{ $contract "Reads a character of input from the stream. Outputs " { $link f } " on stream exhaustion." }
+{ $values { "stream" "an input stream" } { "elt" "an element or " { $link f } } }
+{ $contract "Reads an element from the stream. Outputs " { $link f } " on stream exhaustion." }
{ $notes "Most code only works on one stream at a time and should instead use " { $link read1 } "; see " { $link "stdio" } "." }
$io-error ;
HELP: stream-read
-{ $values { "n" "a non-negative integer" } { "stream" "an input stream" } { "str/f" "a string or " { $link f } } }
-{ $contract "Reads " { $snippet "n" } " characters of input from the stream. Outputs a truncated string or " { $link f } " on stream exhaustion." }
+{ $values { "n" "a non-negative integer" } { "stream" "an input stream" } { "seq" { $or byte-array string f } } }
+{ $contract "Reads " { $snippet "n" } " elements from the stream. Outputs a truncated string or " { $link f } " on stream exhaustion." }
{ $notes "Most code only works on one stream at a time and should instead use " { $link read } "; see " { $link "stdio" } "." }
$io-error ;
HELP: stream-read-until
-{ $values { "seps" string } { "stream" "an input stream" } { "str/f" "a string or " { $link f } } { "sep/f" "a character or " { $link f } } }
-{ $contract "Reads characters from the stream, until the first occurrence of a separator character, or stream exhaustion. In the former case, the separator character is pushed on the stack, and is not part of the output string. In the latter case, the entire stream contents are output, along with " { $link f } "." }
+{ $values { "seps" string } { "stream" "an input stream" } { "seq" { $or byte-array string f } } { "sep/f" "a character or " { $link f } } }
+{ $contract "Reads elements from the stream, until the first occurrence of a separator character, or stream exhaustion. In the former case, the separator is pushed on the stack, and is not part of the output string. In the latter case, the entire stream contents are output, along with " { $link f } "." }
{ $notes "Most code only works on one stream at a time and should instead use " { $link read-until } "; see " { $link "stdio" } "." }
$io-error ;
HELP: stream-read-partial
{ $values
- { "n" integer } { "stream" "an input stream" }
- { "str/f" "a string or " { $link f } } }
-{ $description "Reads at most " { $snippet "n" } " characters from a stream and returns up to that many characters without blocking. If no characters are available, blocks until some are and returns them." } ;
+ { "n" "a non-negative integer" } { "stream" "an input stream" }
+ { "seq" { $or byte-array string f } } }
+{ $description "Reads at most " { $snippet "n" } " elements from a stream and returns up to that many characters without blocking. If no characters are available, blocks until some are and returns them." } ;
HELP: stream-write1
-{ $values { "ch" "a character" } { "stream" "an output stream" } }
-{ $contract "Writes a character of output to the stream. If the stream does buffering, output may not be performed immediately; use " { $link stream-flush } " to force output." }
+{ $values { "elt" "an element" } { "stream" "an output stream" } }
+{ $contract "Writes an element to the stream. If the stream does buffering, output may not be performed immediately; use " { $link stream-flush } " to force output." }
{ $notes "Most code only works on one stream at a time and should instead use " { $link write1 } "; see " { $link "stdio" } "." }
$io-error ;
HELP: stream-write
-{ $values { "str" string } { "stream" "an output stream" } }
-{ $contract "Writes a string of output to the stream. If the stream does buffering, output may not be performed immediately; use " { $link stream-flush } " to force output." }
+{ $values { "seq" "a byte array or string" } { "stream" "an output stream" } }
+{ $contract "Writes a sequence of elements to the stream. If the stream does buffering, output may not be performed immediately; use " { $link stream-flush } " to force output." }
{ $notes "Most code only works on one stream at a time and should instead use " { $link write } "; see " { $link "stdio" } "." }
$io-error ;
{ $notes "Most code only works on one stream at a time and should instead use " { $link nl } "; see " { $link "stdio" } "." }
$io-error ;
-
HELP: stream-print
{ $values { "str" string } { "stream" "an output stream" } }
{ $description "Writes a newline-terminated string." }
$io-error ;
HELP: read1
-{ $values { "ch/f" "a character or " { $link f } } }
-{ $description "Reads a character of input from " { $link input-stream } ". Outputs " { $link f } " on stream exhaustion." }
+{ $values { "elt" "an element or " { $link f } } }
+{ $description "Reads an element from " { $link input-stream } ". Outputs " { $link f } " on stream exhaustion." }
$io-error ;
HELP: read
-{ $values { "n" "a non-negative integer" } { "str/f" "a string or " { $link f } } }
-{ $description "Reads " { $snippet "n" } " characters of input from " { $link input-stream } ". Outputs a truncated string or " { $link f } " on stream exhaustion." }
+{ $values { "n" "a non-negative integer" } { "seq" { $or byte-array string f } } }
+{ $description "Reads " { $snippet "n" } " elements from " { $link input-stream } ". If there is no input available, outputs " { $link f } ". If there are less than " { $snippet "n" } " elements available, outputs a sequence shorter than " { $snippet "n" } " in length." }
$io-error ;
HELP: read-until
-{ $values { "seps" string } { "str/f" "a string or " { $link f } } { "sep/f" "a character or " { $link f } } }
-{ $contract "Reads characters from " { $link input-stream } ". until the first occurrence of a separator character, or stream exhaustion. In the former case, the separator character is pushed on the stack, and is not part of the output string. In the latter case, the entire stream contents are output, along with " { $link f } "." }
+{ $values { "seps" string } { "seq" { $or byte-array string f } } { "sep/f" "a character or " { $link f } } }
+{ $contract "Reads elements from " { $link input-stream } ". until the first occurrence of a separator, or stream exhaustion. In the former case, the separator character is pushed on the stack, and is not part of the output. In the latter case, the entire stream contents are output, along with " { $link f } "." }
$io-error ;
HELP: read-partial
-{ $values
- { "n" null }
- { "str/f" null } }
-{ $description "Reads at most " { $snippet "n" } " characters from " { $link input-stream } " and returns up to that many characters without blocking. If no characters are available, blocks until some are and returns them." } ;
+{ $values { "n" integer } { "seq" { $or byte-array string f } } }
+{ $description "Reads at most " { $snippet "n" } " elements from " { $link input-stream } " and returns them in a sequence. This word should be used instead of " { $link read } " when processing the entire element a chunk at a time, since on some stream implementations it may be slightly faster." } ;
HELP: write1
-{ $values { "ch" "a character" } }
-{ $contract "Writes a character of output to " { $link output-stream } ". If the stream does buffering, output may not be performed immediately; use " { $link flush } " to force output." }
+{ $values { "elt" "an element" } }
+{ $contract "Writes an element to " { $link output-stream } ". If the stream does buffering, output may not be performed immediately; use " { $link flush } " to force output." }
$io-error ;
HELP: write
-{ $values { "str" string } }
-{ $description "Writes a string of output to " { $link output-stream } ". If the stream does buffering, output may not be performed immediately; use " { $link flush } " to force output." }
+{ $values { "seq" { $or byte-array string f } } }
+{ $description "Writes a sequence of elements to " { $link output-stream } ". If the stream does buffering, output may not be performed immediately; use " { $link flush } " to force output." }
$io-error ;
HELP: flush
$io-error ;
HELP: print
-{ $values { "string" string } }
+{ $values { "str" string } }
{ $description "Writes a newline-terminated string to " { $link output-stream } "." }
$io-error ;
{ $values { "quot" { $quotation "( str -- )" } } }
{ $description "Calls the quotation with successive lines of text, until the current " { $link input-stream } " is exhausted." } ;
+HELP: each-block
+{ $values { "quot" { $quotation "( block -- )" } } }
+{ $description "Calls the quotation with successive blocks of data, until the current " { $link input-stream } " is exhausted." } ;
+
HELP: contents
-{ $values { "stream" "an input stream" } { "str" string } }
-{ $description "Reads the entire contents of a stream into a string." }
+{ $values { "stream" "an input stream" } { "seq" "a string, byte array or " { $link f } } }
+{ $description "Reads the entire contents of a stream. If the stream is empty, outputs" { $link f } "." }
$io-error ;
ARTICLE: "stream-protocol" "Stream protocol"
$nl
"All streams must implement the " { $link dispose } " word in addition to the stream protocol."
$nl
-"These words are required for input streams:"
+"These words are required for binary and string input streams:"
{ $subsection stream-read1 }
{ $subsection stream-read }
{ $subsection stream-read-until }
-{ $subsection stream-readln }
{ $subsection stream-read-partial }
-"These words are required for output streams:"
+"This word is only required for string input streams:"
+{ $subsection stream-readln }
+"These words are required for binary and string output streams:"
{ $subsection stream-flush }
{ $subsection stream-write1 }
{ $subsection stream-write }
+"This word is only required for string output streams:"
{ $subsection stream-nl }
+"For a discussion of the distinction between binary and string streams, see " { $link "stream-elements" } "."
{ $see-also "io.timeouts" } ;
-ARTICLE: "stdio" "Default input and output streams"
+ARTICLE: "stdio-motivation" "Motivation for default streams"
"Most I/O code only operates on one stream at a time. The " { $link input-stream } " and " { $link output-stream } " variables are implicit parameters used by many I/O words. Using this idiom improves code in three ways:"
{ $list
{ "Code becomes simpler because there is no need to keep a stream around on the stack." }
"\"data.txt\" utf8 ["
" readln number>string read 16 group"
"] with-file-reader"
-}
+} ;
+
+ARTICLE: "stdio" "Default input and output streams"
+{ $subsection "stdio-motivation" }
"The default input stream is stored in a dynamically-scoped variable:"
{ $subsection input-stream }
"Unless rebound in a child namespace, this variable will be set to a console stream for reading input from the user."
{ $subsection read1 }
{ $subsection read }
{ $subsection read-until }
-{ $subsection readln }
{ $subsection read-partial }
+"If the default input stream is a string stream (" { $link "stream-elements" } "), lines of text can be read:"
+{ $subsection readln }
"A pair of combinators for rebinding the " { $link input-stream } " variable:"
{ $subsection with-input-stream }
{ $subsection with-input-stream* }
{ $subsection flush }
{ $subsection write1 }
{ $subsection write }
+"If the default output stream is a string stream (" { $link "stream-elements" } "), lines of text can be written:"
+{ $subsection readln }
{ $subsection print }
{ $subsection nl }
{ $subsection bl }
"First, a simple composition of " { $link stream-write } " and " { $link stream-nl } ":"
{ $subsection stream-print }
"Processing lines one by one:"
-{ $subsection each-line }
-"Sluring an entire stream into memory all at once:"
{ $subsection lines }
+{ $subsection each-line }
+"Processing blocks of data:"
{ $subsection contents }
+{ $subsection each-block }
"Copying the contents of one stream to another:"
{ $subsection stream-copy } ;
+ARTICLE: "stream-elements" "Stream elements"
+"There are two types of streams:"
+{ $list
+ { { $strong "Binary streams" } " - the elements are integers between 0 and 255, inclusive; they represent bytes. Reading a sequence of elements produces a " { $link byte-array } "." }
+ { { $strong "String streams" } " - the elements are non-negative integers, representing Unicode code points. Reading a sequence of elements produces a " { $link string } "." }
+}
+"Most external streams are binary streams, and can be wrapped in string streams once a suitable encoding has been provided; see " { $link "io.encodings" } "." ;
+
ARTICLE: "streams" "Streams"
-"Input and output centers on the concept of a " { $emphasis "stream" } ", which is a source or sink of characters. Streams also support formatted output, which may be used to present styled text in a manner independent of output medium."
-$nl
-"A stream can either be passed around on the stack or bound to a dynamic variable and used as an implicit " { $emphasis "default stream" } "."
+"Input and output centers on the concept of a " { $emphasis "stream" } ", which is a source or sink of elements."
+{ $subsection "stream-elements" }
+"A stream can either be passed around on the stack or bound to a dynamic variable and used as one of the two implicit " { $emphasis "default streams" } "."
{ $subsection "stream-protocol" }
{ $subsection "stdio" }
{ $subsection "stream-utils" }
continuations destructors assocs ;
IN: io
+GENERIC: stream-read1 ( stream -- elt )
+GENERIC: stream-read ( n stream -- seq )
+GENERIC: stream-read-until ( seps stream -- seq sep/f )
+GENERIC: stream-read-partial ( n stream -- seq )
GENERIC: stream-readln ( stream -- str/f )
-GENERIC: stream-read1 ( stream -- ch/f )
-GENERIC: stream-read ( n stream -- str/f )
-GENERIC: stream-read-until ( seps stream -- str/f sep/f )
-GENERIC: stream-read-partial ( n stream -- str/f )
-GENERIC: stream-write1 ( ch stream -- )
-GENERIC: stream-write ( str stream -- )
+
+GENERIC: stream-write1 ( elt stream -- )
+GENERIC: stream-write ( seq stream -- )
GENERIC: stream-flush ( stream -- )
GENERIC: stream-nl ( stream -- )
-: stream-print ( str stream -- )
- [ stream-write ] keep stream-nl ;
-
-: (stream-copy) ( in out -- )
- 64 1024 * pick stream-read-partial
- [ over stream-write (stream-copy) ] [ 2drop ] if* ;
-
-: stream-copy ( in out -- )
- [ 2dup (stream-copy) ] [ dispose dispose ] [ ]
- cleanup ;
+: stream-print ( str stream -- ) [ stream-write ] keep stream-nl ;
! Default streams
SYMBOL: input-stream
SYMBOL: error-stream
: readln ( -- str/f ) input-stream get stream-readln ;
-: read1 ( -- ch/f ) input-stream get stream-read1 ;
-: read ( n -- str/f ) input-stream get stream-read ;
-: read-until ( seps -- str/f sep/f ) input-stream get stream-read-until ;
-: read-partial ( n -- str/f ) input-stream get stream-read-partial ;
+: read1 ( -- elt ) input-stream get stream-read1 ;
+: read ( n -- seq ) input-stream get stream-read ;
+: read-until ( seps -- seq sep/f ) input-stream get stream-read-until ;
+: read-partial ( n -- seq ) input-stream get stream-read-partial ;
-: write1 ( ch -- ) output-stream get stream-write1 ;
-: write ( str -- ) output-stream get stream-write ;
+: write1 ( elt -- ) output-stream get stream-write1 ;
+: write ( seq -- ) output-stream get stream-write ;
: flush ( -- ) output-stream get stream-flush ;
: nl ( -- ) output-stream get stream-nl ;
[ [ drop dispose dispose ] 3curry ] 3bi
[ ] cleanup ; inline
-: print ( string -- ) output-stream get stream-print ;
+: print ( str -- ) output-stream get stream-print ;
: bl ( -- ) " " write ;
: lines ( stream -- seq )
[ [ readln dup ] [ ] [ drop ] produce ] with-input-stream ;
+<PRIVATE
+
+: each-morsel ( handler: ( data -- ) reader: ( -- data ) -- )
+ [ dup ] compose swap [ drop ] while ; inline
+
+PRIVATE>
+
: each-line ( quot -- )
- [ [ readln dup ] ] dip [ drop ] while ; inline
+ [ readln ] each-morsel ; inline
-: contents ( stream -- str )
+: contents ( stream -- seq )
[
- [ 65536 read dup ] [ ] [ drop ] produce concat f like
+ [ 65536 read-partial dup ]
+ [ ] [ drop ] produce concat f like
] with-input-stream ;
+
+: each-block ( quot: ( block -- ) -- )
+ [ 8192 read-partial ] each-morsel ; inline
+
+: stream-copy ( in out -- )
+ [ [ [ write ] each-block ] with-output-stream ]
+ curry with-input-stream ;
\ No newline at end of file
"Here is an array containing the " { $link f } " class:"
{ $example "{ POSTPONE: f } ." "{ POSTPONE: f }" }
"The " { $link f } " object is an instance of the " { $link f } " class:"
-{ $example "f class ." "POSTPONE: f" }
+{ $example "USE: classes" "f class ." "POSTPONE: f" }
"The " { $link f } " class is an instance of " { $link word } ":"
-{ $example "\\ f class ." "word" }
+{ $example "USE: classes" "\\ f class ." "word" }
"On the other hand, " { $link t } " is just a word, and there is no class which it is a unique instance of."
{ $example "t \\ t eq? ." "t" }
"Many words which search collections confuse the case of no element being present with an element being found equal to " { $link f } ". If this distinction is imporant, there is usually an alternative word which can be used; for example, compare " { $link at } " with " { $link at* } "." ;
{ $see-also "booleans" "bitwise-arithmetic" both? either? } ;
ARTICLE: "equality" "Equality"
-"There are two distinct notions of ``sameness'' when it comes to objects."
+"There are two distinct notions of “sameness†when it comes to objects."
$nl
"You can test if two references point to the same object (" { $emphasis "identity comparison" } "). This is rarely used; it is mostly useful with large, mutable objects where the object identity matters but the value is transient:"
{ $subsection eq? }
! Copyright (C) 2004, 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: kernel.private slots.private classes.tuple.private ;
+USING: kernel.private slots.private math.private
+classes.tuple.private ;
IN: kernel
DEFER: dip
M: identity-tuple equal? 2drop f ;
-USE: math.private
: = ( obj1 obj2 -- ? )
2dup eq? [ 2drop t ] [
2dup both-fixnums? [ 2drop f ] [ equal? ] if
ARTICLE: "integers" "Integers"
{ $subsection integer }
"Integers come in two varieties -- fixnums and bignums. Fixnums fit in a machine word and are faster to manipulate; if the result of a fixnum operation is too large to fit in a fixnum, the result is upgraded to a bignum. Here is an example where two fixnums are multiplied yielding a bignum:"
-{ $example "134217728 class ." "fixnum" }
-{ $example "128 class ." "fixnum" }
+{ $example "USE: classes" "134217728 class ." "fixnum" }
+{ $example "USE: classes" "128 class ." "fixnum" }
{ $example "134217728 128 * ." "17179869184" }
-{ $example "134217728 128 * class ." "bignum" }
+{ $example "USE: classes" "1 128 shift class ." "bignum" }
"Integers can be entered using a different base; see " { $link "syntax-numbers" } "."
$nl
"Integers can be tested for, and real numbers can be converted to integers:"
HELP: shift
{ $values { "x" integer } { "n" integer } { "y" integer } }
-{ $description "Shifts " { $snippet "x" } " to the left by " { $snippet "n" } " bits if " { $snippet "n" } " is positive, or " { $snippet "-n" } " bits to the right if " { $snippet "n" } " is negative. A left shift of a fixnum may overflow, yielding a bignum. A right shift may result in bits ``falling off'' the right hand side and being discarded." }
+{ $description "Shifts " { $snippet "x" } " to the left by " { $snippet "n" } " bits if " { $snippet "n" } " is positive, or " { $snippet "-n" } " bits to the right if " { $snippet "n" } " is negative. A left shift of a fixnum may overflow, yielding a bignum. A right shift may result in bits “falling off†the right hand side and being discarded." }
{ $examples { $example "USING: math prettyprint ;" "BIN: 101 5 shift .b" "10100000" } { $example "USING: math prettyprint ;" "BIN: 11111 -2 shift .b" "111" } } ;
HELP: bitnot
"Math operations obey certain numerical upgrade rules. If one of the inputs is a bignum and the other is a fixnum, the latter is first coerced to a bignum; if one of the inputs is a float, the other is coerced to a float."
$nl
"Two examples where you should note the types of the inputs and outputs:"
-{ $example "3 >fixnum 6 >bignum * class ." "bignum" }
-{ $example "1/2 2.0 + ." "4.5" }
+{ $example "USE: classes" "3 >fixnum 6 >bignum * class ." "bignum" }
+{ $example "1/2 2.0 + ." "2.5" }
"The following usual operations are supported by all numbers."
{ $subsection + }
{ $subsection - }
IN: memory
HELP: begin-scan ( -- )
-{ $description "Moves all objects to tenured space, disables the garbage collector, and resets the heap scan pointer to point at the first object in the heap. The " { $link next-object } " word can then be called to advance the heap scan pointer and return successive objects."
+{ $description "Disables the garbage collector and resets the heap scan pointer to point at the first object in the heap. The " { $link next-object } " word can then be called to advance the heap scan pointer and return successive objects."
$nl
"This word must always be paired with a call to " { $link end-scan } "." }
{ $notes "This is a low-level facility and can be dangerous. Use the " { $link each-object } " combinator instead." } ;
] [ 2drop ] if ; inline recursive
: each-object ( quot -- )
- begin-scan [ (each-object) ] [ end-scan ] [ ] cleanup ; inline
+ gc begin-scan [ (each-object) ] [ end-scan ] [ ] cleanup ; inline
: count-instances ( quot -- n )
0 swap [ 1 0 ? + ] compose each-object ; inline
{ $subsection parse-definition }
"The " { $link POSTPONE: ; } " word is just a delimiter; an unpaired occurrence throws a parse error:"
{ $see POSTPONE: ; }
-"There are additional parsing words whose syntax is delimited by " { $link POSTPONE: ; } ", and they are all implemented by calling " { $link parse-definition } "." ;
+"There are additional parsing words whose syntax is delimited by " { $link POSTPONE: ; } ", and they are all implemented by calling " { $link parse-definition } "." ;
ARTICLE: "parsing-tokens" "Parsing raw tokens"
"So far we have seen how to read individual tokens, or read a sequence of parsed objects until a delimiter. It is also possible to read raw tokens from the input and perform custom processing."
dup vocabulary>>
[ (use+) ]
[ amended-use get dup [ push ] [ 2drop ] if ]
- [ "Added ``" "'' vocabulary to search path" surround note. ]
+ [ "Added \"" "\" vocabulary to search path" surround note. ]
tri
] [ create-in ] if ;
"definitions"
"editors"
"help"
+ "help.lint"
"inspector"
"io"
"io.files"
SYMBOL: print-use-hook
print-use-hook global [ [ ] or ] change-at
-!
+
: parse-fresh ( lines -- quot )
[
V{ } clone amended-use set
[
[
lines dup parse-fresh
- tuck finish-parsing
+ [ nip ] [ finish-parsing ] 2bi
forget-smudged
] with-source-file
] with-compilation-unit ;
HELP: replicate
{ $values
- { "seq" sequence } { "quot" quotation }
+ { "seq" sequence } { "quot" { $quotation "( -- elt )" } }
{ "newseq" sequence } }
{ $description "Calls the quotation for every element of the sequence in order. However, the element is not passed to the quotation -- it is dropped, and the quotation produces an element of its own that is collected into a sequence of the same class as the input sequence." }
{ $examples
: from-end ( seq n -- seq n' ) [ dup length ] dip - ; inline
: (2sequence) ( obj1 obj2 seq -- seq )
- tuck 1 swap set-nth-unsafe
- tuck 0 swap set-nth-unsafe ; inline
+ [ 1 swap set-nth-unsafe ] keep
+ [ 0 swap set-nth-unsafe ] keep ; inline
: (3sequence) ( obj1 obj2 obj3 seq -- seq )
- tuck 2 swap set-nth-unsafe
+ [ 2 swap set-nth-unsafe ] keep
(2sequence) ; inline
: (4sequence) ( obj1 obj2 obj3 obj4 seq -- seq )
- tuck 3 swap set-nth-unsafe
+ [ 3 swap set-nth-unsafe ] keep
(3sequence) ; inline
PRIVATE>
2dup shorter? [
2drop f
] [
- tuck length head-slice sequence=
+ [ nip ] [ length head-slice ] 2bi sequence=
] if ;
: tail? ( seq end -- ? )
2dup shorter? [
2drop f
] [
- tuck length tail-slice* sequence=
+ [ nip ] [ length tail-slice* ] 2bi sequence=
] if ;
: cut-slice ( seq n -- before-slice after-slice )
{ $subsection initial-value } ;
ARTICLE: "slots" "Slots"
-"A " { $emphasis "slot" } " is a component of an object which can store a value."
+"The " { $vocab-link "slots" } " vocabulary contains words for introspecting the slots of an object. A " { $emphasis "slot" } " is a component of an object which can store a value."
$nl
{ $link "tuples" } " are composed entirely of slots, and instances of " { $link "builtin-classes" } " consist of slots together with intrinsic data."
-"The " { $vocab-link "slots" } " vocabulary contains words for introspecting the slots of an object."
$nl
"The " { $snippet "\"slots\"" } " word property of built-in and tuple classes holds an array of " { $emphasis "slot specifiers" } " describing the slot layout of each instance."
{ $subsection slot-spec }
-! Copyright (C) 2005, 2008 Slava Pestov.
+! Copyright (C) 2005, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: arrays byte-arrays kernel kernel.private math namespaces
make sequences strings words effects generic generic.standard
classes classes.algebra slots.private combinators accessors
-words sequences.private assocs alien quotations ;
+words sequences.private assocs alien quotations hashtables ;
IN: slots
TUPLE: slot-spec name offset class initial read-only ;
] [ ] make ;
: writer-props ( slot-spec -- assoc )
- [ "writing" set ] H{ } make-assoc ;
+ "writing" associate ;
: define-writer ( class slot-spec -- )
[ name>> writer-word ] [ writer-quot ] [ writer-props ] tri
{ $subsection 1string }
"Since strings are sequences, basic string manipulation can be performed using sequence operations (" { $link "sequences" } "). More advanced functionality can be found in other vocabularies, including but not limited to:"
{ $list
- { { $vocab-link "ascii" } " - traditional ASCII character classes" }
- { { $vocab-link "unicode.categories" } " - Unicode character classes" }
- { { $vocab-link "unicode.case" } " - Unicode case conversion" }
+ { { $link "ascii" } " - ASCII algorithms for interoperability with legacy applications" }
+ { { $link "unicode" } " - Unicode algorithms for modern multilingual applications" }
{ { $vocab-link "regexp" } " - regular expressions" }
{ { $vocab-link "peg" } " - parser expression grammars" }
} ;
IN: syntax
ARTICLE: "parser-algorithm" "Parser algorithm"
-"At the most abstract level, Factor syntax consists of whitespace-separated tokens. The parser tokenizes the input on whitespace boundaries. The parser is case-sensitive and whitespace between tokens is significant, so the following three expressions tokenize differently:"
+"At the most abstract level, Factor syntax consists of whitespace-separated tokens. The parser tokenizes the input on whitespace boundaries. The parser is case-sensitive and whitespace between tokens is significant, so the following three expressions tokenize differently:"
{ $code "2X+\n2 X +\n2 x +" }
"As the parser reads tokens it makes a distinction between numbers, ordinary words, and parsing words. Tokens are appended to the parse tree, the top level of which is a quotation returned by the original parser invocation. Nested levels of the parse tree are created by parsing words."
$nl
"More information on floats can be found in " { $link "floats" } "." ;
ARTICLE: "syntax-complex-numbers" "Complex number syntax"
-"A complex number is given by two components, a ``real'' part and ''imaginary'' part. The components must either be integers, ratios or floats."
+"A complex number is given by two components, a “real†part and “imaginary†part. The components must either be integers, ratios or floats."
{ $code
"C{ 1/2 1/3 } ! the complex number 1/2+1/3i"
"C{ 0 1 } ! the imaginary unit"
ARTICLE: "syntax-literals" "Literals"
"Many different types of objects can be constructed at parse time via literal syntax. Numbers are a special case since support for reading them is built-in to the parser. All other literals are constructed via parsing words."
$nl
-"If a quotation contains a literal object, the same literal object instance is used each time the quotation executes; that is, literals are ``live''."
+"If a quotation contains a literal object, the same literal object instance is used each time the quotation executes; that is, literals are “liveâ€."
$nl
"Using mutable object literals in word definitions requires care, since if those objects are mutated, the actual word definition will be changed, which is in most cases not what you would expect. Literals should be " { $link clone } "d before being passed to word which may potentially mutate them."
{ $subsection "syntax-numbers" }
{ $description "Creates a new symbol for every token until the " { $snippet ";" } "." }
{ $examples { $example "USING: prettyprint ;" "IN: scratchpad" "SYMBOLS: foo bar baz ;\nfoo . bar . baz ." "foo\nbar\nbaz" } } ;
+HELP: SINGLETON:
+{ $syntax "SINGLETON: class" }
+{ $values
+ { "class" "a new singleton to define" }
+}
+{ $description
+ "Defines a new singleton class. The class word itself is the sole instance of the singleton class."
+}
+{ $examples
+ { $example "USING: classes.singleton kernel io ;" "IN: singleton-demo" "USE: prettyprint SINGLETON: foo\nGENERIC: bar ( obj -- )\nM: foo bar drop \"a foo!\" print ;\nfoo bar" "a foo!" }
+} ;
+
HELP: SINGLETONS:
{ $syntax "SINGLETONS: words... ;" }
{ $values { "words" "a sequence of new words to define" } }
main help
source-loaded? docs-loaded? ;
-! sources-loaded? slot is one of these two
+! sources-loaded? slot is one of these three
SYMBOL: +parsing+
SYMBOL: +running+
SYMBOL: +done+
! Copyright (C) 2008 Jeff Bigot\r
! See http://factorcode.org/license.txt for BSD license.\r
-USING: adsoda\r
-xml\r
-xml.utilities\r
-accessors\r
-combinators\r
-sequences\r
-math.parser\r
-kernel\r
-splitting\r
-values\r
-continuations\r
-;\r
+USING: adsoda xml xml.utilities xml.dispatch accessors combinators\r
+sequences math.parser kernel splitting values continuations ;\r
IN: 4DNav.space-file-decoder\r
\r
: decode-number-array ( x -- y ) "," split [ string>number ] map ;\r
! Copyright (C) 2008, 2009 Jose Antonio Ortega Ruiz.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays assocs compiler.units definitions fuel.eval
-fuel.help help.markup help.topics io.pathnames kernel math math.order
-memoize namespaces parser sequences sets sorting tools.crossref
-tools.scaffold tools.vocabs vocabs vocabs.loader vocabs.parser words ;
+USING: assocs compiler.units fuel.eval fuel.help fuel.remote fuel.xref
+help.topics io.pathnames kernel namespaces parser sequences
+tools.scaffold vocabs.loader ;
IN: fuel
! Edit locations
-<PRIVATE
-
-: fuel-normalize-loc ( seq -- path line )
- [ dup length 0 > [ first (normalize-path) ] [ drop f ] if ]
- [ dup length 1 > [ second ] [ drop 1 ] if ] bi ;
-
-: fuel-get-loc ( object -- )
- fuel-normalize-loc 2array fuel-eval-set-result ;
-
-PRIVATE>
-
-: fuel-get-edit-location ( word -- ) where fuel-get-loc ; inline
+: fuel-get-word-location ( word -- )
+ word-location fuel-eval-set-result ;
: fuel-get-vocab-location ( vocab -- )
- >vocab-link fuel-get-edit-location ; inline
-
-: fuel-get-doc-location ( word -- ) props>> "help-loc" swap at fuel-get-loc ;
-
-: fuel-get-article-location ( name -- ) article loc>> fuel-get-loc ;
-
-! Cross-references
-
-<PRIVATE
-
-: fuel-word>xref ( word -- xref )
- [ name>> ] [ vocabulary>> ] [ where fuel-normalize-loc ] tri 4array ;
-
-: fuel-sort-xrefs ( seq -- seq' )
- [ [ first ] dip first <=> ] sort ; inline
-
-: fuel-format-xrefs ( seq -- seq' )
- [ word? ] filter [ fuel-word>xref ] map ; inline
-
-: (fuel-index) ( seq -- seq )
- [ [ >link name>> ] [ article-title ] bi 2array \ $subsection prefix ] map ;
-
-PRIVATE>
-
-: fuel-callers-xref ( word -- )
- usage fuel-format-xrefs fuel-sort-xrefs fuel-eval-set-result ; inline
-
-: fuel-callees-xref ( word -- )
- uses fuel-format-xrefs fuel-sort-xrefs fuel-eval-set-result ; inline
-
-: fuel-apropos-xref ( str -- )
- words-matching fuel-format-xrefs fuel-eval-set-result ; inline
-
-: fuel-vocab-xref ( vocab -- )
- words fuel-format-xrefs fuel-eval-set-result ; inline
+ vocab-location fuel-eval-set-result ;
-: fuel-index ( quot: ( -- seq ) -- )
- call (fuel-index) fuel-eval-set-result ; inline
+: fuel-get-doc-location ( word -- )
+ doc-location fuel-eval-set-result ;
-! Completion support
+: fuel-get-article-location ( name -- )
+ article-location fuel-eval-set-result ;
-<PRIVATE
-
-: fuel-filter-prefix ( seq prefix -- seq )
- [ drop-prefix nip length 0 = ] curry filter prune ; inline
+: fuel-get-vocabs ( -- )
+ get-vocabs fuel-eval-set-result ;
-: (fuel-get-vocabs) ( -- seq )
- all-vocabs-seq [ vocab-name ] map ; inline
+: fuel-get-vocabs/prefix ( prefix -- )
+ get-vocabs/prefix fuel-eval-set-result ;
-MEMO: (fuel-vocab-words) ( name -- seq )
- >vocab-link words [ name>> ] map ;
+: fuel-get-words ( prefix names -- )
+ get-vocabs-words/prefix fuel-eval-set-result ;
-: fuel-current-words ( -- seq )
- use get [ keys ] map concat ; inline
+! Cross-references
-: fuel-vocabs-words ( names -- seq )
- prune [ (fuel-vocab-words) ] map concat ; inline
+: fuel-callers-xref ( word -- ) callers-xref fuel-eval-set-result ;
-: (fuel-get-words) ( prefix names/f -- seq )
- [ fuel-vocabs-words ] [ fuel-current-words ] if* natural-sort
- swap fuel-filter-prefix ;
+: fuel-callees-xref ( word -- ) callees-xref fuel-eval-set-result ;
-PRIVATE>
+: fuel-apropos-xref ( str -- ) apropos-xref fuel-eval-set-result ;
-: fuel-get-vocabs ( -- )
- (fuel-get-vocabs) fuel-eval-set-result ;
+: fuel-vocab-xref ( vocab -- ) vocab-xref fuel-eval-set-result ;
-: fuel-get-vocabs/prefix ( prefix -- )
- (fuel-get-vocabs) swap fuel-filter-prefix fuel-eval-set-result ;
+: fuel-vocab-uses-xref ( vocab -- ) vocab-uses-xref fuel-eval-set-result ;
-: fuel-get-words ( prefix names -- )
- (fuel-get-words) fuel-eval-set-result ;
+: fuel-vocab-usage-xref ( vocab -- ) vocab-usage-xref fuel-eval-set-result ;
! Help support
: fuel-vocab-summary ( name -- )
(fuel-vocab-summary) fuel-eval-set-result ;
+: fuel-index ( quot -- ) call format-index fuel-eval-set-result ;
+
: fuel-get-vocabs/tag ( tag -- )
(fuel-get-vocabs/tag) fuel-eval-set-result ;
: fuel-scaffold-get-root ( name -- ) find-vocab-root fuel-eval-set-result ;
+! Remote connection
+
+MAIN: fuel-start-remote-listener*
MEMO: (fuel-get-vocabs/tag) ( tag -- element )
[ "Vocabularies tagged " prepend \ $heading swap 2array ]
[ tagged fuel-vocab-list ] bi 2array ;
+
+: format-index ( seq -- seq )
+ [ [ >link name>> ] [ article-title ] bi 2array \ $subsection prefix ] map ;
--- /dev/null
+Jose Antonio Ortega Ruiz
\ No newline at end of file
--- /dev/null
+! Copyright (C) 2009 Jose Antonio Ortega Ruiz.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors debugger io io.encodings.utf8 io.servers.connection
+kernel listener math namespaces ;
+
+IN: fuel.remote
+
+<PRIVATE
+
+: start-listener ( -- )
+ [ [ print-error-and-restarts ] error-hook set listener ] with-scope ;
+
+: server ( port -- server )
+ <threaded-server>
+ "tty-server" >>name
+ utf8 >>encoding
+ swap local-server >>insecure
+ [ start-listener ] >>handler
+ f >>timeout ;
+
+: print-banner ( -- )
+ "Starting server. Connect with 'M-x connect-to-factor' in Emacs"
+ write nl flush ;
+
+PRIVATE>
+
+: fuel-start-remote-listener ( port/f -- )
+ print-banner integer? [ 9000 ] unless* server start-server ;
+
+: fuel-start-remote-listener* ( -- ) f fuel-start-remote-listener ;
+
--- /dev/null
+Jose Antonio Ortega Ruiz
\ No newline at end of file
--- /dev/null
+! Copyright (C) 2009 Jose Antonio Ortega Ruiz.
+! See http://factorcode.org/license.txt for BSD license.
+
+USING: accessors arrays assocs definitions help.topics io.pathnames
+kernel math math.order memoize namespaces sequences sets sorting
+tools.crossref tools.vocabs vocabs vocabs.parser words ;
+
+IN: fuel.xref
+
+<PRIVATE
+
+: normalize-loc ( seq -- path line )
+ [ dup length 0 > [ first (normalize-path) ] [ drop f ] if ]
+ [ dup length 1 > [ second ] [ drop 1 ] if ] bi ;
+
+: get-loc ( object -- loc ) normalize-loc 2array ;
+
+: word>xref ( word -- xref )
+ [ name>> ] [ vocabulary>> ] [ where normalize-loc ] tri 4array ;
+
+: vocab>xref ( vocab -- xref )
+ dup dup >vocab-link where normalize-loc 4array ;
+
+: sort-xrefs ( seq -- seq' )
+ [ [ first ] dip first <=> ] sort ; inline
+
+: format-xrefs ( seq -- seq' )
+ [ word? ] filter [ word>xref ] map ; inline
+
+: filter-prefix ( seq prefix -- seq )
+ [ drop-prefix nip length 0 = ] curry filter prune ; inline
+
+MEMO: (vocab-words) ( name -- seq )
+ >vocab-link words [ name>> ] map ;
+
+: current-words ( -- seq )
+ use get [ keys ] map concat ; inline
+
+: vocabs-words ( names -- seq )
+ prune [ (vocab-words) ] map concat ; inline
+
+PRIVATE>
+
+: callers-xref ( word -- seq ) usage format-xrefs sort-xrefs ;
+
+: callees-xref ( word -- seq ) uses format-xrefs sort-xrefs ;
+
+: apropos-xref ( str -- seq ) words-matching format-xrefs ;
+
+: vocab-xref ( vocab -- seq ) words format-xrefs ;
+
+: word-location ( word -- loc ) where get-loc ;
+
+: vocab-location ( vocab -- loc ) >vocab-link where get-loc ;
+
+: vocab-uses-xref ( vocab -- seq ) vocab-uses [ vocab>xref ] map ;
+
+: vocab-usage-xref ( vocab -- seq ) vocab-usage [ vocab>xref ] map ;
+
+: doc-location ( word -- loc ) props>> "help-loc" swap at get-loc ;
+
+: article-location ( name -- loc ) article loc>> get-loc ;
+
+: get-vocabs ( -- seq ) all-vocabs-seq [ vocab-name ] map ;
+
+: get-vocabs/prefix ( prefix -- seq ) get-vocabs swap filter-prefix ;
+
+: get-vocabs-words/prefix ( prefix names/f -- seq )
+ [ vocabs-words ] [ current-words ] if* natural-sort swap filter-prefix ;
+++ /dev/null
-
-USING: accessors combinators.cleave combinators.short-circuit
-concurrency.combinators destructors fry io io.directories
-io.encodings io.encodings.utf8 io.launcher io.monitors
-io.pathnames io.pipes io.ports kernel locals math namespaces
-sequences splitting strings threads ui ui.gadgets
-ui.gadgets.buttons ui.gadgets.editors ui.gadgets.labels
-ui.gadgets.packs ui.gadgets.tracks ;
-
-IN: git-tool
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-: head** ( seq obj -- seq/f ) dup number? [ head ] [ dupd find drop head ] if ;
-
-: tail** ( seq obj -- seq/f )
- dup number?
- [ tail ]
- [ dupd find drop [ tail ] [ drop f ] if* ]
- if ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: <process-stdout-stderr-reader> ( DESC -- process stream stream )
- [
- [let | STDOUT-PIPE [ (pipe) |dispose ]
- STDERR-PIPE [ (pipe) |dispose ] |
-
- [let | PROCESS [ DESC >process ] |
-
- PROCESS
- [ STDOUT-PIPE out>> or ] change-stdout
- [ STDERR-PIPE out>> or ] change-stderr
- run-detached
-
- STDOUT-PIPE out>> dispose
- STDERR-PIPE out>> dispose
-
- STDOUT-PIPE in>> <input-port> utf8 <decoder>
- STDERR-PIPE in>> <input-port> utf8 <decoder> ] ]
- ]
- with-destructors ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-: run-process/result ( desc -- process )
- <process-stdout-stderr-reader>
- {
- [ contents [ string-lines ] [ f ] if* ]
- [ contents [ string-lines ] [ f ] if* ]
- }
- parallel-spread
- [ >>stdout ] [ >>stderr ] bi*
- dup wait-for-process >>status ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-! process popup windows
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-: popup-window ( title contents -- )
- dup string? [ ] [ "\n" join ] if
- <editor> tuck set-editor-string swap open-window ;
-
-: popup-process-window ( process -- )
- [ stdout>> [ "output" swap popup-window ] when* ]
- [ stderr>> [ "error" swap popup-window ] when* ]
- [
- [ stdout>> ] [ stderr>> ] bi or not
- [ "Process" "NO OUTPUT" popup-window ]
- when
- ]
- tri ;
-
-: popup-if-error ( process -- )
- { [ status>> 0 = not ] [ popup-process-window t ] } 1&& drop ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: git-process ( REPO DESC -- process )
- REPO [ DESC run-process/result ] with-directory ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-: git-status-section ( lines section -- lines/f )
- '[ _ = ] tail**
- [
- [ "#\t" head? ] tail**
- [ "#\t" head? not ] head**
- [ 2 tail ] map
- ]
- [ f ]
- if* ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-: colon ( -- ch ) CHAR: : ;
-: space ( -- ch ) 32 ;
-
-: git-status-line-file ( line -- file )
- { [ colon = ] 1 [ space = not ] } [ tail** ] each ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-TUPLE: <git-status>
- repository
- to-commit-new
- to-commit-modified
- to-commit-deleted
- modified
- deleted
- untracked ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-: check-empty ( seq -- seq/f ) dup empty? [ drop f ] when ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: refresh-git-status ( STATUS -- STATUS )
-
- [let | LINES [ STATUS repository>> { "git" "status" } git-process stdout>> ] |
-
- STATUS
-
- LINES "# Changes to be committed:" git-status-section
- [ "new file:" head? ] filter
- [ git-status-line-file ] map
- check-empty
- >>to-commit-new
-
- LINES "# Changes to be committed:" git-status-section
- [ "modified:" head? ] filter
- [ git-status-line-file ] map
- check-empty
- >>to-commit-modified
-
- LINES "# Changes to be committed:" git-status-section
- [ "deleted:" head? ] filter
- [ git-status-line-file ] map
- check-empty
- >>to-commit-deleted
-
- LINES "# Changed but not updated:" git-status-section
- [ "modified:" head? ] filter
- [ git-status-line-file ] map
- check-empty
- >>modified
-
- LINES "# Changed but not updated:" git-status-section
- [ "deleted:" head? ] filter
- [ git-status-line-file ] map
- check-empty
- >>deleted
-
- LINES "# Untracked files:" git-status-section >>untracked ] ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: git-status ( REPO -- <git-status> )
-
- <git-status> new REPO >>repository refresh-git-status ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: factor-git-status ( -- <git-status> ) "resource:" git-status ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-! git-tool
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-: to-commit ( <git-status> -- seq )
- { to-commit-new>> to-commit-modified>> to-commit-deleted>> } 1arr concat ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: refresh-status-pile ( STATUS PILE -- )
-
- STATUS refresh-git-status drop
-
- PILE clear-gadget
-
- PILE
-
- ! Commit section
-
- [wlet | add-commit-path-button [| TEXT PATH |
-
- { 1 0 } <track>
-
- TEXT <label> 2/8 track-add
- PATH <label> 6/8 track-add
-
- "Reset"
- [
- drop
-
- STATUS repository>>
- { "git" "reset" "HEAD" PATH }
- git-process
- drop
-
- STATUS PILE refresh-status-pile
- ]
- <bevel-button> f track-add
-
- add-gadget ] |
-
- STATUS to-commit
- [
- "Changes to be committed" <label> reverse-video-theme add-gadget
-
- STATUS to-commit-new>>
- [| PATH | "new file: " PATH add-commit-path-button ]
- each
-
- STATUS to-commit-modified>>
- [| PATH | "modified: " PATH add-commit-path-button ]
- each
-
- STATUS to-commit-deleted>>
- [| PATH | "deleted: " PATH add-commit-path-button ]
- each
-
- <pile> 1 >>fill
-
- [let | EDITOR [ <editor> "COMMIT MESSAGE" over set-editor-string ] |
-
- EDITOR add-gadget
-
- "Commit"
- [
- drop
- [let | MSG [ EDITOR editor-string ] |
-
- STATUS repository>>
- { "git" "commit" "-m" MSG } git-process
- popup-if-error ]
- STATUS PILE refresh-status-pile
- ]
- <bevel-button>
- add-gadget ]
-
- add-gadget
-
- ]
- when ]
-
- ! Modified section
-
- STATUS modified>>
- [
- "Modified but not updated" <label> reverse-video-theme add-gadget
-
- STATUS modified>>
- [| PATH |
-
- <shelf>
-
- PATH <label> add-gadget
-
- "Add"
- [
- drop
- STATUS repository>> { "git" "add" PATH } git-process popup-if-error
- STATUS PILE refresh-status-pile
- ]
- <bevel-button> add-gadget
-
- "Diff"
- [
- drop
- STATUS repository>> { "git" "diff" PATH } git-process
- popup-process-window
- ]
- <bevel-button> add-gadget
-
- add-gadget
-
- ]
- each
-
- ]
- when
-
- ! Untracked section
-
- STATUS untracked>>
- [
- "Untracked files" <label> reverse-video-theme add-gadget
-
- STATUS untracked>>
- [| PATH |
-
- { 1 0 } <track>
-
- PATH <label> f track-add
-
- "Add"
- [
- drop
- STATUS repository>> { "git" "add" PATH } git-process popup-if-error
- STATUS PILE refresh-status-pile
- ]
- <bevel-button> f track-add
-
- add-gadget
-
- ]
- each
-
- ]
- when
-
- ! Refresh button
-
- "Refresh" [ drop STATUS PILE refresh-status-pile ] <bevel-button> add-gadget
-
- drop ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: git-remote-branches ( REPO NAME -- seq )
- REPO { "git" "remote" "show" NAME } git-process stdout>>
- " Tracked remote branches" over index 1 + tail first " " split
- [ empty? not ] filter ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: refresh-remotes-pile ( REPO PILE -- )
-
- PILE clear-gadget
-
- PILE
-
- "Remotes" <label> reverse-video-theme add-gadget
-
- REPO { "git" "remote" } git-process stdout>> [ empty? not ] filter
-
- [| NAME |
-
- [let | BRANCH! [ "master" ] |
-
- { 1 0 } <track>
-
- NAME <label> 1 track-add
-
- [let | BRANCH-BUTTON [ "master" [ drop ] <bevel-button> ] |
-
- BRANCH-BUTTON
- [
- drop
-
- <pile>
-
- 1 >>fill
-
- REPO NAME git-remote-branches
- [| OTHER-BRANCH |
- OTHER-BRANCH
- [
- drop
-
- OTHER-BRANCH BRANCH!
-
- OTHER-BRANCH BRANCH-BUTTON gadget-child set-label-string
-
- ]
- <bevel-button>
- add-gadget
- ]
- each
-
- "Select a branch" open-window
- ]
- >>quot
-
- 1 track-add ]
-
- "Fetch"
- [ drop REPO { "git" "fetch" NAME } git-process popup-process-window ]
- <bevel-button>
- 1 track-add
-
- "..remote/branch"
- [
- drop
- [let | ARG [ { ".." NAME "/" BRANCH } concat ] |
- REPO { "git" "log" ARG } git-process popup-process-window ]
- ]
- <bevel-button>
- 1 track-add
-
- "Merge"
- [
- drop
- [let | ARG [ { NAME "/" BRANCH } concat ] |
- REPO { "git" "merge" ARG } git-process popup-process-window ]
- ]
- <bevel-button>
- 1 track-add
-
- "remote/branch.."
- [
- drop
- [let | ARG [ { NAME "/" BRANCH ".." } concat ] |
- REPO { "git" "log" ARG } git-process popup-process-window ]
- ]
- <bevel-button>
- 1 track-add
-
- "Push"
- [
- drop
- REPO { "git" "push" NAME "master" } git-process popup-process-window
- ]
- <bevel-button>
- 1 track-add
-
- add-gadget ]
-
- ]
- each
-
- drop ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: git-tool ( REPO -- )
-
- <pile> 1 >>fill
-
- "Repository: " REPO [ current-directory get ] with-directory append
- <label>
- add-gadget
-
- [let | STATUS [ REPO git-status ]
- PILE [ <pile> 1 >>fill ] |
-
- [
- [
- [let | MONITOR [ REPO t <monitor> ] |
- [
- [let | PATH [ MONITOR next-change drop ] |
- ".git" PATH subseq? ! Ignore git internal operations
- [ ]
- [ STATUS PILE refresh-status-pile ]
- if
- t ]
- ]
- loop
- ]
- ]
- with-monitors
- ]
- in-thread
-
- STATUS PILE refresh-status-pile
-
- PILE add-gadget ]
-
- REPO <pile> 1 >>fill tuck refresh-remotes-pile add-gadget
-
- "Git" open-window ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-: factor-git-tool ( -- ) "resource:" git-tool ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
+++ /dev/null
-
-USING: accessors calendar git-tool git-tool io.directories
-io.monitors io.pathnames kernel locals math namespaces
-sequences splitting system threads ui ui.gadgets
-ui.gadgets.buttons ui.gadgets.labels ui.gadgets.packs ;
-
-USING: git-tool ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-IN: git-tool.remote
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-TUPLE: <git-remote-gadget> < pack
- repository
- branch
- remote
- remote-branch
- fetch-period
- push
- closed
- last-refresh ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-: current-branch ( REPO -- branch )
- { "git" "branch" } git-process stdout>> [ "* " head? ] find nip 2 tail ;
-
-: list-branches ( REPO -- branches )
- { "git" "branch" } git-process stdout>>
- [ empty? not ] filter
- [ 2 tail ] map ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-: list-remotes ( REPO -- remotes )
- { "git" "remote" } git-process stdout>> [ empty? not ] filter ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: list-remote-branches ( REPO REMOTE -- branches )
- [let | OUT [ REPO { "git" "remote" "show" REMOTE } git-process stdout>> ] |
-
- " Tracked remote branches" OUT member?
- [
- OUT
- " Tracked remote branches" OUT index 1 + tail first " " split
- [ empty? not ] filter
- ]
- [
- OUT
- OUT [ " New remote branches" head? ] find drop
- 1 + tail first " " split
- [ empty? not ] filter
- ]
- if ] ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: refresh-git-remote-gadget ( GADGET -- )
-
- [let | REPO [ GADGET repository>> ] |
-
- GADGET clear-gadget
-
- GADGET
-
- ! Repository label
-
- "Repository: " REPO [ current-directory get ] with-directory append
- <label>
- add-gadget
-
- ! Branch button
-
- <shelf>
-
- "Branch: " <label> add-gadget
-
- REPO current-branch
- [
- drop
-
- <pile>
- REPO list-branches
-
- [| BRANCH |
-
- BRANCH
- [
- drop
- REPO { "git" "checkout" BRANCH } git-process popup-if-error
- GADGET refresh-git-remote-gadget
- ]
- <bevel-button> add-gadget
-
- ]
- each
-
- "Select a branch" open-window
-
- ]
- <bevel-button> add-gadget
-
- add-gadget
-
- ! Remote button
-
- <shelf>
-
- "Remote: " <label> add-gadget
-
- GADGET remote>>
- [
- drop
-
- <pile>
-
- REPO list-remotes
-
- [| REMOTE |
-
- REMOTE
- [
- drop
- GADGET REMOTE >>remote drop
- GADGET "master" >>remote-branch drop
- GADGET refresh-git-remote-gadget
- ]
- <bevel-button> add-gadget
-
- ]
- each
-
- "Select a remote" open-window
-
- ]
- <bevel-button> add-gadget
-
- add-gadget
-
- ! Remote branch button
-
- <shelf>
-
- "Remote branch: " <label> add-gadget
-
- GADGET remote-branch>>
- [
- drop
-
- <pile>
-
- REPO GADGET remote>> list-remote-branches
-
- [| REMOTE-BRANCH |
-
- REMOTE-BRANCH
- [
- drop
- GADGET REMOTE-BRANCH >>remote-branch drop
- GADGET refresh-git-remote-gadget
- ]
- <bevel-button> add-gadget
- ]
-
- each
-
- "Select a remote branch" open-window
-
- ]
- <bevel-button> add-gadget
-
- add-gadget
-
- ! Fetch button
-
- "Fetch"
- [
- drop
- [let | REMOTE [ GADGET remote>> ] |
- REPO { "git" "fetch" REMOTE } git-process popup-if-error ]
-
- GADGET refresh-git-remote-gadget
- ]
- <bevel-button> add-gadget
-
- ! Available changes
-
- [let | REMOTE [ GADGET remote>> ]
- REMOTE-BRANCH [ GADGET remote-branch>> ] |
-
- [let | ARG [ { ".." REMOTE "/" REMOTE-BRANCH } concat ] |
-
- [let | PROCESS [ REPO { "git" "log" ARG } git-process ] |
-
- PROCESS stdout>>
- [
- <shelf>
-
- "Changes available:" <label> add-gadget
-
- "View"
- [
- drop
- PROCESS popup-process-window
- ]
- <bevel-button> add-gadget
-
- "Merge"
- [
- drop
-
- [let | ARG [ { REMOTE "/" REMOTE-BRANCH } concat ] |
-
- REPO { "git" "merge" ARG } git-process popup-process-window
-
- ]
-
- GADGET refresh-git-remote-gadget
-
- ]
- <bevel-button> add-gadget
-
- add-gadget
-
- ]
- when
-
- ] ] ]
-
-
- ! Pushable changes
-
- [let | REMOTE [ GADGET remote>> ]
- REMOTE-BRANCH [ GADGET remote-branch>> ] |
-
- [let | ARG [ { REMOTE "/" REMOTE-BRANCH ".." } concat ] |
-
- [let | PROCESS [ REPO { "git" "log" ARG } git-process ] |
-
- PROCESS stdout>>
- [
- <shelf>
-
- "Pushable changes: " <label> add-gadget
-
- "View"
- [
- drop
- PROCESS popup-process-window
- ]
- <bevel-button> add-gadget
-
- "Push"
- [
- drop
-
- REPO { "git" "push" REMOTE REMOTE-BRANCH }
- git-process
- popup-process-window
-
- GADGET refresh-git-remote-gadget
-
- ]
- <bevel-button> add-gadget
-
- add-gadget
-
- ]
- when
-
- ] ] ]
-
- drop
-
- ] ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: start-fetch-thread ( GADGET -- )
-
- GADGET f >>closed drop
-
- [
-
- [
-
- GADGET closed>>
- [ f ]
- [
- [let | REPO [ GADGET repository>> ]
- REMOTE-BRANCH [ GADGET remote-branch>> ] |
-
- REPO { "git" "fetch" REMOTE-BRANCH } git-process drop ]
-
- GADGET fetch-period>> sleep
-
- t
- ]
- if
-
-
- ]
- loop
-
- ]
-
- in-thread ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: start-monitor-thread ( GADGET -- )
-
- GADGET f >>closed drop
-
- [
- [
- [let | MONITOR [ GADGET repository>> t <monitor> ] |
-
- [
- GADGET closed>>
- [ f ]
- [
-
- [let | PATH [ MONITOR next-change drop ] |
-
- ".git" PATH subseq?
- [ ]
- [
- micros
- GADGET last-refresh>> 0 or -
- 1000000 >
- [
- GADGET micros >>last-refresh drop
- GADGET refresh-git-remote-gadget
- ]
- when
- ]
- if ]
-
- t
-
- ]
- if
- ]
- loop
- ]
- ]
- with-monitors
- ]
- in-thread ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-M: <git-remote-gadget> pref-dim* ( gadget -- dim ) drop { 500 500 } ;
-
-M:: <git-remote-gadget> graft* ( GADGET -- )
- GADGET start-fetch-thread
- GADGET start-monitor-thread ;
-
-M:: <git-remote-gadget> ungraft* ( GADGET -- ) GADGET t >>closed drop ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: git-remote-tool ( REPO -- )
-
- <git-remote-gadget> new-gadget
-
- { 0 1 } >>orientation
- 1 >>fill
-
- REPO >>repository
-
- "origin" >>remote
-
- "master" >>remote-branch
-
- 5 minutes >>fetch-period
-
- dup refresh-git-remote-gadget
-
- "git-remote-tool" open-window ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-: factor-git-remote-tool ( -- ) "resource:" git-remote-tool ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-MAIN: factor-git-remote-tool
\ No newline at end of file
": forever ( quot -- ) '[ @ t ] loop ; inline"
""
"\"/tmp\" t <monitor>"
- "'[ _ next-change . . ] forever"
+ "'[ _ next-change . ] forever"
}
}
{ $slide "Example: time server"
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays html.parser.utils hashtables io kernel
namespaces make prettyprint quotations sequences splitting
-state-parser strings unicode.categories unicode.case ;
+html.parser.state strings unicode.categories unicode.case ;
IN: html.parser
TUPLE: tag name attributes text closing? ;
[ get-char CHAR: " = ] take-until ;
: read-quote ( -- string )
- get-char next* CHAR: ' =
- [ read-single-quote ] [ read-double-quote ] if next* ;
+ get-char next CHAR: ' =
+ [ read-single-quote ] [ read-double-quote ] if next ;
: read-key ( -- string )
read-whitespace*
: read-= ( -- )
read-whitespace*
- [ get-char CHAR: = = ] take-until drop next* ;
+ [ get-char CHAR: = = ] take-until drop next ;
: read-value ( -- string )
read-whitespace*
[ blank? ] trim ;
: read-comment ( -- )
- "-->" take-string* make-comment-tag push-tag ;
+ "-->" take-string make-comment-tag push-tag ;
: read-dtd ( -- )
- ">" take-string* make-dtd-tag push-tag ;
+ ">" take-string make-dtd-tag push-tag ;
: read-bang ( -- )
- next* get-char CHAR: - = get-next CHAR: - = and [
- next* next*
+ next get-char CHAR: - = get-next CHAR: - = and [
+ next next
read-comment
] [
read-dtd
: read-tag ( -- string )
[ get-char CHAR: > = get-char CHAR: < = or ] take-until
- get-char CHAR: < = [ next* ] unless ;
+ get-char CHAR: < = [ next ] unless ;
: read-< ( -- string )
- next* get-char CHAR: ! = [
+ next get-char CHAR: ! = [
read-bang f
] [
read-tag
--- /dev/null
+USING: tools.test html.parser.state ascii kernel ;
+IN: html.parser.state.tests
+
+: take-rest ( -- string )
+ [ f ] take-until ;
+
+: take-char ( -- string )
+ [ get-char = ] curry take-until ;
+
+[ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test
+[ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test
+[ "foo" ";bar" ] [ "foo;bar" [ CHAR: ; take-char take-rest ] string-parse ] unit-test
+! [ "foo " " bar" ] [ "foo and bar" [ "and" take-string take-rest ] string-parse ] unit-test
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: namespaces math kernel sequences accessors fry circular ;
+IN: html.parser.state
+
+TUPLE: state string i ;
+
+: get-i ( -- i ) state get i>> ;
+
+: get-char ( -- char )
+ state get [ i>> ] [ string>> ] bi ?nth ;
+
+: get-next ( -- char )
+ state get [ i>> 1+ ] [ string>> ] bi ?nth ;
+
+: next ( -- )
+ state get [ 1+ ] change-i drop ;
+
+: string-parse ( string quot -- )
+ [ 0 state boa state ] dip with-variable ;
+
+: short* ( n seq -- n' seq )
+ over [ nip dup length swap ] unless ;
+
+: skip-until ( quot: ( -- ? ) -- )
+ get-char [
+ [ call ] keep swap
+ [ drop ] [ next skip-until ] if
+ ] [ drop ] if ; inline recursive
+
+: take-until ( quot: ( -- ? ) -- )
+ [ get-i ] dip skip-until get-i
+ state get string>> subseq ;
+
+: string-matches? ( string circular -- ? )
+ get-char over push-circular sequence= ;
+
+: take-string ( match -- string )
+ dup length <circular-string>
+ [ 2dup string-matches? ] take-until nip
+ dup length rot length 1- - head next ;
USING: assocs combinators continuations hashtables
hashtables.private io kernel math
namespaces prettyprint quotations sequences splitting
-state-parser strings tools.test ;
+strings tools.test ;
USING: html.parser.utils ;
IN: html.parser.utils.tests
! See http://factorcode.org/license.txt for BSD license.
USING: assocs circular combinators continuations hashtables
hashtables.private io kernel math namespaces prettyprint
-quotations sequences splitting state-parser strings
+quotations sequences splitting html.parser.state strings
combinators.short-circuit ;
IN: html.parser.utils
: string-parse-end? ( -- ? ) get-next not ;
-: take-string* ( match -- string )
- dup length <circular-string>
- [ 2dup string-matches? ] take-until nip
- dup length rot length 1- - head next* ;
-
: trim1 ( seq ch -- newseq )
[ [ ?head-slice drop ] [ ?tail-slice drop ] bi ] 2keep drop like ;
[ print read-lines ] [ 2drop flush ] if ;\r
\r
: tail-file-loop ( stream monitor -- )\r
- dup next-change 2drop over read-lines tail-file-loop ;\r
+ dup next-change drop over read-lines tail-file-loop ;\r
\r
: tail-file ( file -- )\r
dup utf8 <file-reader> dup read-lines\r
! Copyright (c) 2007, 2008 Aaron Schaefer.
! See http://factorcode.org/license.txt for BSD license.
-USING: combinators.short-circuit grouping kernel math math.parser namespaces
- sequences ;
+USING: combinators.short-circuit grouping kernel math math.parser
+math.text.utils namespaces sequences ;
IN: math.text.english
<PRIVATE
: negative-text ( n -- str )
0 < "Negative " "" ? ;
-: 3digit-groups ( n -- seq )
- [ dup 0 > ] [ 1000 /mod ] [ ] produce nip ;
-
: hundreds-place ( n -- str )
100 /mod over 0 = [
2drop ""
--- /dev/null
+Samuel Tardieu
--- /dev/null
+USING: help.markup help.syntax ;
+IN: math.text.french
+
+HELP: number>text
+{ $values { "n" "an integer" } { "str" "a string" } }
+{ $description "Return the a string describing " { $snippet "n" } " in French. Numbers with absolute value equal to or greater than 10^12 will be returned using their numeric representation." } ;
--- /dev/null
+USING: math math.functions math.parser math.text.french sequences tools.test ;
+
+[ "zéro" ] [ 0 number>text ] unit-test
+[ "vingt et un" ] [ 21 number>text ] unit-test
+[ "vingt-deux" ] [ 22 number>text ] unit-test
+[ "deux mille" ] [ 2000 number>text ] unit-test
+[ "soixante et un" ] [ 61 number>text ] unit-test
+[ "soixante-deux" ] [ 62 number>text ] unit-test
+[ "quatre-vingts" ] [ 80 number>text ] unit-test
+[ "quatre-vingt-un" ] [ 81 number>text ] unit-test
+[ "quatre-vingt-onze" ] [ 91 number>text ] unit-test
+[ "deux cents" ] [ 200 number>text ] unit-test
+[ "mille deux cents" ] [ 1200 number>text ] unit-test
+[ "mille deux cent quatre-vingts" ] [ 1280 number>text ] unit-test
+[ "mille deux cent quatre-vingt-un" ] [ 1281 number>text ] unit-test
+[ "un billion deux cent vingt milliards quatre-vingts millions trois cent quatre-vingt mille deux cents" ] [ 1220080380200 number>text ] unit-test
+[ "un million" ] [ 1000000 number>text ] unit-test
+[ "un million un" ] [ 1000001 number>text ] unit-test
+[ "moins vingt" ] [ -20 number>text ] unit-test
+[ 104 ] [ -1 10 102 ^ - number>text length ] unit-test
+! Check that we do not exhaust stack
+[ 1484 ] [ 10 100 ^ 1 - number>text length ] unit-test
--- /dev/null
+! Copyright (c) 2009 Samuel Tardieu.
+! See http://factorcode.org/license.txt for BSD license.
+USING: arrays assocs combinators kernel math math.functions
+math.parser math.text.utils memoize sequences ;
+IN: math.text.french
+
+<PRIVATE
+
+DEFER: basic ( n -- str )
+
+CONSTANT: literals
+ H{ { 0 "zéro" } { 1 "un" } { 2 "deux" } { 3 "trois" } { 4 "quatre" }
+ { 5 "cinq" } { 6 "six" } { 7 "sept" } { 8 "huit" } { 9 "neuf" }
+ { 10 "dix" } { 11 "onze" } { 12 "douze" } { 13 "treize" }
+ { 14 "quatorze" } { 15 "quinze" } { 16 "seize" } { 17 "dix-sept" }
+ { 18 "dix-huit" } { 19 "dix-neuf" } { 20 "vingt" } { 30 "trente" }
+ { 40 "quarante" } { 50 "cinquante" } { 60 "soixante" }
+ { 71 "soixante et onze" } { 80 "quatre-vingts" }
+ { 81 "quatre-vingt-un" }
+ { 100 "cent" } { 1000 "mille" } }
+
+MEMO: units ( -- seq ) ! up to 10^99
+ { "m" "b" "tr" "quadr" "quint" "sext" "sept" "oct"
+ "non" "déc" "unodéc" "duodéc" "trédéc" "quattuordéc"
+ "quindéc" "sexdéc" }
+ [ [ "illion" append ] [ "illiard" append ] bi 2array ] map concat
+ "mille" prefix ;
+
+! The only plurals we have to remove are "quatre-vingts" and "cents",
+! which are also the only strings ending with "ts".
+: unpluralize ( str -- newstr ) dup "ts" tail? [ but-last ] when ;
+: pluralize ( str -- newstr ) CHAR: s suffix ;
+
+: space-append ( str1 str2 -- str ) " " glue ;
+
+! Small numbers (below 100) use dashes between them unless they are
+! separated with "et". Pluralized prefixes must be unpluralized.
+: complete-small ( str n -- str )
+ { { 0 [ ] }
+ { 1 [ " et un" append ] }
+ [ [ unpluralize ] dip basic "-" glue ] } case ;
+
+: smaller-than-60 ( n -- str )
+ dup 10 mod [ - ] keep [ basic ] dip complete-small ;
+
+: base-onto ( n b -- str ) [ nip literals at ] [ - ] 2bi complete-small ;
+
+: smaller-than-80 ( n -- str ) 60 base-onto ;
+
+: smaller-than-100 ( n -- str ) 80 base-onto ;
+
+: if-zero ( n quot quot -- )
+ [ dup zero? ] 2dip [ [ drop ] prepose ] dip if ; inline
+
+: complete ( str n -- newstr )
+ [ ] [ basic space-append ] if-zero ;
+
+: smaller-than-1000 ( n -- str )
+ 100 /mod
+ [ "cent" swap dup 1 = [ drop ] [ basic swap space-append ] if ]
+ [ [ pluralize ] [ basic space-append ] if-zero ] bi* ;
+
+: smaller-than-2000 ( n -- str ) "mille" swap 1000 - complete ;
+
+: smaller-than-1000000 ( n -- str )
+ 1000 /mod [ basic unpluralize " mille" append ] dip complete ;
+
+: n-units ( n unit -- str/f )
+ {
+ { [ over zero? ] [ 2drop f ] }
+ { [ over 1 = ] [ [ basic ] dip space-append ] }
+ [ [ basic ] dip space-append pluralize ]
+ } cond ;
+
+: over-1000000 ( n -- str )
+ 3digit-groups [ 1+ units nth n-units ] map-index sift
+ reverse " " join ;
+
+: decompose ( n -- str ) 1000000 /mod [ over-1000000 ] dip complete ;
+
+: basic ( n -- str )
+ {
+ { [ dup literals key? ] [ literals at ] }
+ { [ dup 0 < ] [ abs basic "moins " swap append ] }
+ { [ dup 60 < ] [ smaller-than-60 ] }
+ { [ dup 80 < ] [ smaller-than-80 ] }
+ { [ dup 100 < ] [ smaller-than-100 ] }
+ { [ dup 1000 < ] [ smaller-than-1000 ] }
+ { [ dup 2000 < ] [ smaller-than-2000 ] }
+ { [ dup 1000000 < ] [ smaller-than-1000000 ] }
+ [ decompose ]
+ } cond ;
+
+PRIVATE>
+
+: number>text ( n -- str )
+ dup abs 10 102 ^ >= [ number>string ] [ basic ] if ;
--- /dev/null
+Convert integers to French text
--- /dev/null
+Aaron Schaefer
--- /dev/null
+Number to text conversion utilities
--- /dev/null
+USING: help.markup help.syntax ;
+IN: math.text.utils
+
+HELP: 3digit-groups
+{ $values { "n" "a positive integer" } { "seq" "a sequence" } }
+{ $description "Decompose a number into 3 digits groups and return them in a sequence, starting with the units, then the tenths, etc." } ;
--- /dev/null
+USING: math.text.utils tools.test ;
+
+[ { 1 999 2 } ] [ 2999001 3digit-groups ] unit-test
--- /dev/null
+! Copyright (c) 2007, 2008 Aaron Schaefer.
+! See http://factorcode.org/license.txt for BSD license.
+USING: kernel math sequences ;
+IN: math.text.utils
+
+: 3digit-groups ( n -- seq )
+ [ dup 0 > ] [ 1000 /mod ] [ ] produce nip ;
{ $notes "It is important to note that even if the quotation discards items on the stack, the stack will be restored to the way it was before it is called (which is true of continuation usage in general)." } ;
ARTICLE: "partial-continuations" "Partial continuations"
-"Based on Scheme code for bshift and breset from"
+"Based on Scheme code for bshift and breset from "
{ $url "http://groups.google.com/group/comp.lang.scheme/msg/9f0d61da01540816" } "."
-"See this blog entry for more details:"
+" See this blog entry for more details:"
{ $url "http://www.bluishcoder.co.nz/2006/03/factor-partial-continuation-updates.html" }
{ $subsection breset }
{ $subsection bshift } ;
{ n-based-assoc <n-based-assoc> } related-words
-ARTICLE: "sequences.n-based" "sequences.n-based"
+ARTICLE: "sequences.n-based" "N-based sequences"
"The " { $vocab-link "sequences.n-based" } " vocabulary provides a sequence adaptor that allows a sequence to be treated as an assoc with non-zero-based keys."
{ $subsection n-based-assoc }
{ $subsection <n-based-assoc> }
+++ /dev/null
-
-USING: io io.encodings.ascii io.files io.files.temp io.launcher
- locals math.parser sequences sequences.deep
- help.syntax
- easy-help ;
-
-IN: size-of
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-Word: size-of
-
-Values:
-
- HEADERS sequence : List of header files
- TYPE string : A C type
- n integer : Size in number of bytes ..
-
-Description:
-
- Use 'size-of' to find out the size in bytes of a C type.
-
- The 'headers' argument is a list of header files to use. You may
- pass 'f' to only use 'stdio.h'. ..
-
-Example:
-
- ! Find the size of 'int'
-
- f "int" size-of . ..
-
-Example:
-
- ! Find the size of the 'XAnyEvent' struct from Xlib.h
-
- { "X11/Xlib.h" } "XAnyEvent" size-of . ..
-
-;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-:: size-of ( HEADERS TYPE -- n )
-
- [let | C-FILE [ "size-of.c" temp-file ]
- EXE-FILE [ "size-of" temp-file ]
- INCLUDES [ HEADERS [| FILE | { "#include <" FILE ">" } concat ] map ] |
-
- {
- "#include <stdio.h>"
- INCLUDES
- "main() { printf( \"%i\" , sizeof( " TYPE " ) ) ; }"
- }
-
- flatten C-FILE ascii set-file-lines
-
- { "gcc" C-FILE "-o" EXE-FILE } try-process
-
- EXE-FILE ascii <process-reader> contents string>number ] ;
-
-! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-FUEL, Factor's Ultimate Emacs Library -*- org -*-
+FUEL, Factor's Ultimate Emacs Library
-------------------------------------
FUEL provides a complete environment for your Factor coding pleasure
* Basic usage
*** Running the listener
- If you're using the default factor binary and images locations inside
- the Factor's source tree, that should be enough to start using FUEL.
- Editing any file with the extension .factor will put you in
- factor-mode; try C-hm for a summary of available commands.
+ If you're using the default factor binary and images locations inside
+ the Factor's source tree, that should be enough to start using FUEL.
+ Editing any file with the extension .factor will put you in
+ factor-mode; try C-hm for a summary of available commands.
- To start the listener, try M-x run-factor.
+ To start the listener, try M-x run-factor.
- By default, FUEL will try to use the binary and image files in the
- factor installation directory. You can customize them with:
+ By default, FUEL will try to use the binary and image files in the
+ factor installation directory. You can customize them with:
(setq fuel-listener-factor-binary <full path to factor>)
(setq fuel-listener-factor-image <full path to factor image>)
- Many aspects of the environment can be customized:
- M-x customize-group fuel will show you how many.
+ Many aspects of the environment can be customized:
+ M-x customize-group fuel will show you how many.
*** Faster listener startup
- On startup, run-factor loads the fuel vocabulary, which can take a
- while. If you want to speedup the load process, type 'save' in the
- listener prompt just after invoking run-factor. This will save a
- factor image (overwriting the current one) with all the needed
- vocabs.
+ On startup, run-factor loads the fuel vocabulary, which can take a
+ while. If you want to speedup the load process, type 'save' in the
+ listener prompt just after invoking run-factor. This will save a
+ factor image (overwriting the current one) with all the needed
+ vocabs.
+
+*** Connecting to a running Factor
+
+ 'run-factor' starts a new factor listener process managed by Emacs.
+ If you prefer to start Factor externally, you can also connect
+ remotely from Emacs. Here's how to proceed:
+
+ - In the factor listener, run FUEL: "fuel" run
+ This will start a server listener in port 9000.
+ - Switch to Emacs and issue the command 'M-x connect-to-factor'.
+
+ That's it; you should be up and running. See the help for
+ 'connect-to-factor' for how to use a different port.
*** Vocabulary creation
- FUEL offers a basic interface with Factor's scaffolding utilities.
+ FUEL offers a basic interface to Factor's scaffolding utilities.
To create a new vocabulary directory and associated files:
M-x fuel-scaffold-vocab
* Quick key reference
- (Triple chords ending in a single letter <x> accept also C-<x> (e.g.
- C-cC-eC-r is the same as C-cC-er)).
+ Triple chords ending in a single letter <x> accept also C-<x> (e.g.
+ C-cC-eC-r is the same as C-cC-er).
*** In factor source files:
- - C-cz : switch to listener
- - C-co : cycle between code, tests and docs factor files
- - C-cs : switch to other factor buffer (M-x fuel-switch-to-buffer)
- - C-x4s : switch to other factor buffer in other window
- - C-x5s : switch to other factor buffer in other frame
-
- - M-. : edit word at point in Emacs (see fuel-edit-word-method custom var)
- - M-, : go back to where M-. was last invoked
- - M-TAB : complete word at point
- - C-cC-eu : update USING: line
- - C-cC-ev : edit vocabulary (M-x fuel-edit-vocabulary)
- - C-cC-ew : edit word (M-x fuel-edit-word-at-point)
- - C-cC-ed : edit word's doc (M-x fuel-edit-word-at-point)
-
- - C-cr, C-cC-er : eval region
- - C-M-r, C-cC-ee : eval region, extending it to definition boundaries
- - C-M-x, C-cC-ex : eval definition around point
- - C-ck, C-cC-ek : run file
-
- - C-cC-da : toggle autodoc mode
- - C-cC-dd : help for word at point
- - C-cC-ds : short help word at point
- - C-cC-de : show stack effect of current sexp (with prefix, region)
- - C-cC-dp : find words containing given substring (M-x fuel-apropos)
- - C-cC-dv : show words in current file (with prefix, ask for vocab)
-
- - C-cM-<, C-cC-d< : show callers of word at point
- - C-cM->, C-cC-d> : show callees of word at point
-
- - C-cC-xs : extract innermost sexp (up to point) as a separate word
- - C-cC-xr : extract region as a separate word
- - C-cC-xi : replace word at point by its definition
- - C-cC-xv : extract region as a separate vocabulary
+ Commands in parenthesis can be invoked interactively with
+ M-x <command>, not necessarily in a factor buffer.
+
+ |-----------------+------------------------------------------------------------|
+ | C-cz | switch to listener (run-factor) |
+ | C-co | cycle between code, tests and docs files |
+ | C-cr | switch to listener and refresh all loaded vocabs |
+ | C-cs | switch to other factor buffer (fuel-switch-to-buffer) |
+ | C-x4s | switch to other factor buffer in other window |
+ | C-x5s | switch to other factor buffer in other frame |
+ |-----------------+------------------------------------------------------------|
+ | M-. | edit word at point in Emacs (fuel-edit-word) |
+ | M-, | go back to where M-. was last invoked |
+ | M-TAB | complete word at point |
+ | C-cC-eu | update USING: line (fuel-update-usings) |
+ | C-cC-ev | edit vocabulary (fuel-edit-vocabulary) |
+ | C-cC-ew | edit word (fuel-edit-word-at-point) |
+ | C-cC-ed | edit word's doc (C-u M-x fuel-edit-word-doc-at-point) |
+ |-----------------+------------------------------------------------------------|
+ | C-cC-er | eval region |
+ | C-M-r, C-cC-ee | eval region, extending it to definition boundaries |
+ | C-M-x, C-cC-ex | eval definition around point |
+ | C-ck, C-cC-ek | run file (fuel-run-file) |
+ |-----------------+------------------------------------------------------------|
+ | C-cC-da | toggle autodoc mode (fuel-autodoc-mode) |
+ | C-cC-dd | help for word at point (fuel-help) |
+ | C-cC-ds | short help word at point (fuel-help-short) |
+ | C-cC-de | show stack effect of current sexp (with prefix, region) |
+ | C-cC-dp | find words containing given substring (fuel-apropos) |
+ | C-cC-dv | show words in current file (with prefix, ask for vocab) |
+ |-----------------+------------------------------------------------------------|
+ | C-cM-<, C-cC-d< | show callers of word or vocabulary at point |
+ | | (fuel-show-callers, fuel-vocab-usage) |
+ | C-cM->, C-cC-d> | show callees of word or vocabulary at point |
+ | | (fuel-show-callees, fuel-vocab-uses) |
+ |-----------------+------------------------------------------------------------|
+ | C-cC-xs | extract innermost sexp (up to point) as a separate word |
+ | | (fuel-refactor-extract-sexp) |
+ | C-cC-xr | extract region as a separate word |
+ | | (fuel-refactor-extract-region) |
+ | C-cC-xv | extract region as a separate vocabulary |
+ | | (fuel-refactor-extract-vocab) |
+ | C-cC-xi | replace word by its definition (fuel-refactor-inline-word) |
+ | C-cC-xw | rename all uses of a word (fuel-refactor-rename-word) |
+ |-----------------+------------------------------------------------------------|
*** In the listener:
- - TAB : complete word at point
- - M-. : edit word at point in Emacs
- - C-ca : toggle autodoc mode
- - C-cp : find words containing given substring (M-x fuel-apropos)
- - C-cs : toggle stack mode
- - C-cv : edit vocabulary
- - C-ch : help for word at point
- - C-ck : run file
+ |------+----------------------------------------------------------|
+ | TAB | complete word at point |
+ | M-. | edit word at point in Emacs |
+ | C-cr | refresh all loaded vocabs |
+ | C-ca | toggle autodoc mode |
+ | C-cp | find words containing given substring (M-x fuel-apropos) |
+ | C-cs | toggle stack mode |
+ | C-cv | edit vocabulary |
+ | C-ch | help for word at point |
+ | C-ck | run file |
+ |------+----------------------------------------------------------|
*** In the debugger (it pops up upon eval/compilation errors):
- - g : go to error
- - <digit> : invoke nth restart
- - w/e/l : invoke :warnings, :errors, :linkage
- - q : bury buffer
+ |---------+-------------------------------------|
+ | g | go to error |
+ | <digit> | invoke nth restart |
+ | w/e/l | invoke :warnings, :errors, :linkage |
+ | q | bury buffer |
+ |---------+-------------------------------------|
*** In the help browser:
- - h : help for word at point
- - v : help for a vocabulary
- - a : find words containing given substring (M-x fuel-apropos)
- - e : edit current article
- - ba : bookmark current page
- - bb : display bookmarks
- - bd : delete bookmark at point
- - n/p : next/previous page
- - l : previous page
- - SPC/S-SPC : scroll up/down
- - TAB/S-TAB : next/previous link
- - k : kill current page and go to previous or next
- - r : refresh page
- - c : clean browsing history
- - M-. : edit word at point in Emacs
- - C-cz : switch to listener
- - q : bury buffer
+ |-----------+----------------------------------------------------------|
+ | h | help for word at point |
+ | v | help for a vocabulary |
+ | a | find words containing given substring (M-x fuel-apropos) |
+ | e | edit current article |
+ | ba | bookmark current page |
+ | bb | display bookmarks |
+ | bd | delete bookmark at point |
+ | n/p | next/previous page |
+ | l | previous page |
+ | SPC/S-SPC | scroll up/down |
+ | TAB/S-TAB | next/previous link |
+ | k | kill current page and go to previous or next |
+ | r | refresh page |
+ | c | clean browsing history |
+ | M-. | edit word at point in Emacs |
+ | C-cz | switch to listener |
+ | q | bury buffer |
+ |-----------+----------------------------------------------------------|
*** In crossref buffers
- - TAB/BACKTAB : navigate links
- - RET/mouse click : follow link
- - h : show help for word at point
- - q : bury buffer
+ |-----------------+-----------------------------|
+ | TAB/BACKTAB | navigate links |
+ | RET/mouse click | follow link |
+ | h | show help for word at point |
+ | q | bury buffer |
+ |-----------------+-----------------------------|
(autoload 'switch-to-factor "fuel-listener.el"
"Start a Factor listener, or switch to a running one." t)
+(autoload 'connect-to-factor "fuel-listener.el"
+ "Connect to an external Factor listener." t)
+
(autoload 'fuel-autodoc-mode "fuel-help.el"
"Minor mode showing in the minibuffer a synopsis of Factor word at point."
t)
;;; fuel-completion.el -- completion utilities
-;; Copyright (C) 2008 Jose Antonio Ortega Ruiz
+;; Copyright (C) 2008, 2009 Jose Antonio Ortega Ruiz
;; See http://factorcode.org/license.txt for BSD license.
;; Author: Jose Antonio Ortega Ruiz <jao@gnu.org>
(fuel-eval--send/wait '(:fuel* (fuel-get-vocabs) "fuel" (:array)))))))
fuel-completion--vocabs)
+(defun fuel-completion--read-vocab (&optional reload init-input history)
+ (let ((vocabs (fuel-completion--vocabs reload)))
+ (completing-read "Vocab name: " vocabs nil nil init-input history)))
+
(defsubst fuel-completion--vocab-list (prefix)
(fuel-eval--retort-result
(fuel-eval--send/wait `(:fuel* (,prefix fuel-get-vocabs/prefix) t t))))
;;; fuel-debug.el -- debugging factor code
-;; Copyright (C) 2008 Jose Antonio Ortega Ruiz
+;; Copyright (C) 2008, 2009 Jose Antonio Ortega Ruiz
;; See http://factorcode.org/license.txt for BSD license.
;; Author: Jose Antonio Ortega Ruiz <jao@gnu.org>
(goto-char (point-min))
(when (search-forward (car ci) nil t)
(setq str (format "%c %s, %s" (cdr ci) (car ci) str))))))
- (if (and (not err) fuel-debug--uses) "u to update USING:, " "")))
+ (if fuel-debug--uses "u to update USING:, " "")))
(defun fuel-debug--buffer-file ()
(with-current-buffer (fuel-debug--buffer)
(goto-char (point-min))
(if (re-search-forward "^USING: " nil t)
(let ((begin (point))
- (end (or (and (re-search-forward "\\_<;\\_>") (point)) (point))))
+ (end (or (and (re-search-forward ";\\( \\|$\\)") (point))
+ (point))))
(kill-region begin end))
(re-search-forward "^IN: " nil t)
(beginning-of-line)
\f
;;; Customization
-(defcustom fuel-edit-word-method nil
- "How the new buffer is opened when invoking
-\\[fuel-edit-word-at-point]."
- :group 'fuel
- :type '(choice (const :tag "Other window" window)
- (const :tag "Other frame" frame)
- (const :tag "Current window" nil)))
+(defmacro fuel-edit--define-custom-visit (var group doc)
+ `(defcustom ,var nil
+ ,doc
+ :group ',group
+ :type '(choice (const :tag "Other window" window)
+ (const :tag "Other frame" frame)
+ (const :tag "Current window" nil))))
+
+(fuel-edit--define-custom-visit
+ fuel-edit-word-method fuel
+ "How the new buffer is opened when invoking \\[fuel-edit-word-at-point]")
\f
;;; Auxiliar functions:
+(defun fuel-edit--visit-file (file method)
+ (cond ((eq method 'window) (find-file-other-window file))
+ ((eq method 'frame) (find-file-other-frame file))
+ (t (find-file file))))
+
(defun fuel-edit--looking-at-vocab ()
(save-excursion
(fuel-syntax--beginning-of-defun)
- (looking-at "USING:\\|USE:")))
+ (looking-at "USING:\\|USE:\\|IN:")))
(defun fuel-edit--try-edit (ret)
(let* ((err (fuel-eval--retort-error ret))
(error "Couldn't find edit location"))
(unless (file-readable-p (car loc))
(error "Couldn't open '%s' for read" (car loc)))
- (cond ((eq fuel-edit-word-method 'window) (find-file-other-window (car loc)))
- ((eq fuel-edit-word-method 'frame) (find-file-other-frame (car loc)))
- (t (find-file (car loc))))
+ (fuel-edit--visit-file (car loc) fuel-edit-word-method)
(goto-line (if (numberp (cadr loc)) (cadr loc) 1))))
(defun fuel-edit--read-vocabulary-name (refresh)
nil
fuel-edit--word-history
arg))
- (cmd `(:fuel* ((:quote ,word) fuel-get-edit-location))))
+ (cmd `(:fuel* ((:quote ,word) fuel-get-word-location))))
(fuel-edit--try-edit (fuel-eval--send/wait cmd))))
(defun fuel-edit-word-at-point (&optional arg)
(interactive "P")
(let* ((word (or (and (not arg) (fuel-syntax-symbol-at-point))
(fuel-completion--read-word "Edit word: ")))
- (cmd `(:fuel* ((:quote ,word) fuel-get-edit-location)))
+ (cmd `(:fuel* ((:quote ,word) fuel-get-word-location)))
(marker (and (not arg) (point-marker))))
(if (and (not arg) (fuel-edit--looking-at-vocab))
(fuel-edit-vocabulary nil word)
(case (car sexp)
(:array (factor--seq 'V{ '} (cdr sexp)))
(:seq (factor--seq '{ '} (cdr sexp)))
+ (:tuple (factor--seq 'T{ '} (cdr sexp)))
(:quote (format "\\ %s" (factor `(:factor ,(cadr sexp)))))
(:quotation (factor--seq '\[ '\] (cdr sexp)))
(:using (factor `(USING: ,@(cdr sexp) :end)))
(defun fuel-font-lock--syntactic-face (state)
(if (nth 3 state) 'factor-font-lock-string
(let ((c (char-after (nth 8 state))))
- (cond ((char-equal c ?\ )
+ (cond ((or (char-equal c ?\ ) (char-equal c ?\n))
(save-excursion
(goto-char (nth 8 state))
(beginning-of-line)
- (cond ((looking-at "USING: ") 'factor-font-lock-vocabulary-name)
+ (cond ((looking-at "USING: ")
+ 'factor-font-lock-vocabulary-name)
((looking-at "\\(TUPLE\\|SYMBOLS\\|VARS\\): ")
'factor-font-lock-symbol)
+ ((looking-at "C-ENUM:\\( \\|\n\\)")
+ 'factor-font-lock-constant)
(t 'default))))
- ((char-equal c ?U) 'factor-font-lock-parsing-word)
+ ((or (char-equal c ?U) (char-equal c ?C))
+ 'factor-font-lock-parsing-word)
((char-equal c ?\() 'factor-font-lock-stack-effect)
((char-equal c ?\") 'factor-font-lock-string)
(t 'factor-font-lock-comment)))))
(defconst fuel-font-lock--font-lock-keywords
`((,fuel-syntax--stack-effect-regex . 'factor-font-lock-stack-effect)
(,fuel-syntax--brace-words-regex 1 'factor-font-lock-parsing-word)
+ (,fuel-syntax--alien-function-regex (1 'factor-font-lock-type-name)
+ (2 'factor-font-lock-word))
(,fuel-syntax--vocab-ref-regexp 2 'factor-font-lock-vocabulary-name)
- (,fuel-syntax--constructor-regex (1 'factor-font-lock-word)
- (2 'factor-font-lock-type-name)
- (3 'factor-font-lock-invalid-syntax nil t))
+ (,fuel-syntax--constructor-decl-regex (1 'factor-font-lock-word)
+ (2 'factor-font-lock-type-name)
+ (3 'factor-font-lock-invalid-syntax nil t))
(,fuel-syntax--typedef-regex (1 'factor-font-lock-type-name)
(2 'factor-font-lock-type-name)
(3 'factor-font-lock-invalid-syntax nil t))
(,fuel-syntax--getter-regex . 'factor-font-lock-getter-word)
(,fuel-syntax--symbol-definition-regex 2 'factor-font-lock-symbol)
(,fuel-syntax--bad-string-regex . 'factor-font-lock-invalid-syntax)
- ("\\(P\\|SBUF\\)\"" 1 'factor-font-lock-parsing-word)
+ ("\\_<\\(P\\|SBUF\\|DLL\\)\"" 1 'factor-font-lock-parsing-word)
(,fuel-syntax--parsing-words-regex . 'factor-font-lock-parsing-word)))
(defun fuel-font-lock--font-lock-setup (&optional keywords no-syntax)
(fuel-listener--wait-for-prompt 10000)
(fuel-con--setup-connection (current-buffer))))
+(defun fuel-listener--connect-process (port)
+ (message "Connecting to remote listener ...")
+ (pop-to-buffer (fuel-listener--buffer))
+ (let ((process (get-buffer-process (current-buffer))))
+ (when (or (not process)
+ (y-or-n-p "Kill current listener? "))
+ (make-comint-in-buffer "fuel listener" (current-buffer)
+ (cons "localhost" port))
+ (fuel-listener--wait-for-prompt 10000)
+ (fuel-con--setup-connection (current-buffer)))))
+
(defun fuel-listener--process (&optional start)
(or (and (buffer-live-p (fuel-listener--buffer))
(get-buffer-process (fuel-listener--buffer)))
(goto-char (point-max))
(unless seen (error "No prompt found!"))))
-(defun fuel-listener-nuke ()
- (interactive)
- (goto-char (point-max))
- (comint-kill-region comint-last-input-start (point))
- (comint-redirect-cleanup)
- (fuel-con--setup-connection fuel-listener--buffer))
\f
-;;; Interface: starting fuel listener
+;;; Interface: starting and interacting with fuel listener:
(defalias 'switch-to-factor 'run-factor)
(defalias 'switch-to-fuel-listener 'run-factor)
(pop-to-buffer buf)
(switch-to-buffer buf))))
+(defun connect-to-factor (&optional arg)
+ "Connects to a remote listener running in the same host.
+Without prefix argument, the default port, 9000, is used.
+Otherwise, you'll be prompted for it. To make this work, in the
+remote listener you need to issue the words
+'fuel-start-remote-listener*' or 'port
+fuel-start-remote-listener', from the fuel vocabulary."
+ (interactive "P")
+ (let ((port (if (not arg) 9000 (read-number "Port: "))))
+ (fuel-listener--connect-process port)))
+
+(defun fuel-listener-nuke ()
+ "Try this command if the listener becomes unresponsive."
+ (interactive)
+ (goto-char (point-max))
+ (comint-kill-region comint-last-input-start (point))
+ (comint-redirect-cleanup)
+ (fuel-con--setup-connection fuel-listener--buffer))
+
+(defun fuel-refresh-all ()
+ "Switch to the listener buffer and invokes Factor's refresh-all.
+With prefix, you're teletransported to the listener's buffer."
+ (interactive)
+ (let ((buf (process-buffer (fuel-listener--process))))
+ (pop-to-buffer buf)
+ (comint-send-string nil "\"Refreshing loaded vocabs...\" write nl flush")
+ (comint-send-string nil " refresh-all \"Done!\" write nl flush\n")))
+
\f
;;; Completion support
(define-key fuel-listener-mode-map "\C-a" 'fuel-listener--bol)
(define-key fuel-listener-mode-map "\C-ca" 'fuel-autodoc-mode)
(define-key fuel-listener-mode-map "\C-ch" 'fuel-help)
+(define-key fuel-listener-mode-map "\C-cr" 'fuel-refresh-all)
(define-key fuel-listener-mode-map "\C-cs" 'fuel-stack-mode)
(define-key fuel-listener-mode-map "\C-cp" 'fuel-apropos)
(define-key fuel-listener-mode-map "\M-." 'fuel-edit-word-at-point)
(defvar fuel-markup--maybe-nl nil))
(defun fuel-markup--print (e)
- (cond ((null e))
+ (cond ((null e) (insert "f"))
((stringp e) (fuel-markup--insert-string e))
((and (listp e) (symbolp (car e))
(assoc (car e) fuel-markup--printers))
(insert (cadr e))))
(defun fuel-markup--snippet (e)
- (let ((snip (format "%s" (cadr e))))
- (insert (fuel-font-lock--factor-str snip))))
+ (insert (mapconcat '(lambda (s)
+ (if (stringp s)
+ (fuel-font-lock--factor-str s)
+ (fuel-markup--print-str s)))
+ (cdr e)
+ " ")))
(defun fuel-markup--code (e)
(fuel-markup--insert-nl-if-nb)
(fuel-markup--snippet (cons '$snippet (cdr e))))
(defun fuel-markup--link (e)
- (let* ((link (nth 1 e))
+ (let* ((link (or (nth 1 e) 'f))
(type (or (nth 3 e) (if (symbolp link) 'word 'article)))
(label (or (nth 2 e)
(and (eq type 'article)
(fuel-mode--key-1 ?k 'fuel-run-file)
(fuel-mode--key-1 ?l 'fuel-run-file)
-(fuel-mode--key-1 ?r 'fuel-eval-region)
+(fuel-mode--key-1 ?r 'fuel-refresh-all)
(fuel-mode--key-1 ?z 'run-factor)
(fuel-mode--key-1 ?s 'fuel-switch-to-buffer)
(define-key fuel-mode-map "\C-x4s" 'fuel-switch-to-buffer-other-window)
(fuel-mode--key ?e ?w 'fuel-edit-word)
(fuel-mode--key ?e ?x 'fuel-eval-definition)
-(fuel-mode--key ?x ?s 'fuel-refactor-extract-sexp)
+(fuel-mode--key ?x ?i 'fuel-refactor-inline-word)
(fuel-mode--key ?x ?r 'fuel-refactor-extract-region)
+(fuel-mode--key ?x ?s 'fuel-refactor-extract-sexp)
(fuel-mode--key ?x ?v 'fuel-refactor-extract-vocab)
-(fuel-mode--key ?x ?i 'fuel-refactor-inline-word)
+(fuel-mode--key ?x ?w 'fuel-refactor-rename-word)
(fuel-mode--key ?d ?> 'fuel-show-callees)
(fuel-mode--key ?d ?< 'fuel-show-callers)
(require 'fuel-syntax)
(require 'fuel-base)
+(require 'etags)
+
\f
;;; Word definitions in buffer
(let* ((code (buffer-substring begin end))
(existing (fuel-refactor--reuse-existing code))
(code-str (or existing (fuel--region-to-string begin end)))
+ (word (or (car existing) (read-string "New word name: ")))
(stack-effect (or existing
(fuel-stack--infer-effect code-str)
- (read-string "Stack effect: ")))
- (word (or (car existing) (read-string "New word name: "))))
+ (read-string "Stack effect: "))))
(goto-char begin)
(delete-region begin end)
(insert word)
(save-excursion (font-lock-fontify-region start (point)))
(indent-region start (point))))))
+\f
+;;; Rename word:
+
+(defsubst fuel-refactor--rename-word (from to file)
+ (let ((files (fuel-xref--word-callers-files from)))
+ (tags-query-replace from to t `(cons ,file ',files))
+ files))
+
+(defun fuel-refactor--def-word ()
+ (save-excursion
+ (fuel-syntax--beginning-of-defun)
+ (or (and (looking-at fuel-syntax--method-definition-regex)
+ (match-string-no-properties 2))
+ (and (looking-at fuel-syntax--word-definition-regex)
+ (match-string-no-properties 2)))))
+
+(defun fuel-refactor-rename-word (&optional arg)
+ "Rename globally the word whose definition point is at.
+With prefix argument, use word at point instead."
+ (interactive "P")
+ (let* ((from (if arg (fuel-syntax-symbol-at-point) (fuel-refactor--def-word)))
+ (from (read-string "Rename word: " from))
+ (to (read-string (format "Rename '%s' to: " from)))
+ (buffer (current-buffer)))
+ (fuel-refactor--rename-word from to (buffer-file-name))))
+
\f
;;; Extract vocab:
;;; Regexps galore:
(defconst fuel-syntax--parsing-words
- '(":" "::" ";" "<<" "<PRIVATE" ">>"
- "ABOUT:" "ALIAS:" "ARTICLE:"
+ '(":" "::" ";" "&:" "<<" "<PRIVATE" ">>"
+ "ABOUT:" "ALIAS:" "ALIEN:" "ARTICLE:"
"B" "BIN:"
- "C:" "C-STRUCT:" "C-UNION:" "CHAR:" "CONSTANT:" "call-next-method"
+ "C:" "C-ENUM:" "C-STRUCT:" "C-UNION:" "CHAR:" "CONSTANT:" "call-next-method"
"DEFER:"
"ERROR:" "EXCLUDE:"
- "f" "FORGET:" "FROM:"
+ "f" "FORGET:" "FROM:" "FUNCTION:"
"GENERIC#" "GENERIC:"
"HELP:" "HEX:" "HOOK:"
"IN:" "initial:" "INSTANCE:" "INTERSECTION:"
+ "LIBRARY:"
"M:" "MACRO:" "MACRO::" "MAIN:" "MATH:" "MEMO:" "MEMO:" "METHOD:" "MIXIN:"
"OCT:"
"POSTPONE:" "PREDICATE:" "PRIMITIVE:" "PRIVATE>" "PROVIDE:"
'("IN:" "USE:" "FROM:" "EXCLUDE:" "QUALIFIED:" "QUALIFIED-WITH:")))
(defconst fuel-syntax--int-constant-def-regex
- (fuel-syntax--second-word-regex '("CHAR:" "BIN:" "HEX:" "OCT:")))
+ (fuel-syntax--second-word-regex '("ALIEN:" "CHAR:" "BIN:" "HEX:" "OCT:")))
(defconst fuel-syntax--type-definition-regex
- (fuel-syntax--second-word-regex '("MIXIN:" "TUPLE:" "SINGLETON:" "UNION:")))
+ (fuel-syntax--second-word-regex
+ '("C-STRUCT:" "C-UNION:" "MIXIN:" "TUPLE:" "SINGLETON:" "UNION:")))
(defconst fuel-syntax--tuple-decl-regex
"^TUPLE: +\\([^ \n]+\\) +< +\\([^ \n]+\\)\\_>")
(defconst fuel-syntax--setter-regex "\\_<>>.+?\\_>")
(defconst fuel-syntax--symbol-definition-regex
- (fuel-syntax--second-word-regex '("SYMBOL:" "VAR:")))
+ (fuel-syntax--second-word-regex '("&:" "SYMBOL:" "VAR:")))
(defconst fuel-syntax--stack-effect-regex
"\\( ( .* )\\)\\|\\( (( .* ))\\)")
(defconst fuel-syntax--sub-vocab-regex "^<\\([^ \n]+\\) *$")
+(defconst fuel-syntax--alien-function-regex
+ "\\_<FUNCTION: \\(\\w+\\) \\(\\w+\\)")
+
(defconst fuel-syntax--indent-def-starts '("" ":"
- "FROM"
+ "C-ENUM" "C-STRUCT" "C-UNION"
+ "FROM" "FUNCTION:"
"INTERSECTION:"
"M" "MACRO" "MACRO:"
"MEMO" "MEMO:" "METHOD"
"VARS"))
(defconst fuel-syntax--indent-def-start-regex
- (format "^\\(%s:\\) " (regexp-opt fuel-syntax--indent-def-starts)))
+ (format "^\\(%s:\\)\\( \\|\n\\)" (regexp-opt fuel-syntax--indent-def-starts)))
(defconst fuel-syntax--no-indent-def-start-regex
(format "^\\(%s:\\) " (regexp-opt fuel-syntax--no-indent-def-starts)))
"GENERIC:" "GENERIC#"
"HELP:" "HEX:" "HOOK:"
"IN:" "INSTANCE:"
+ "LIBRARY:"
"MAIN:" "MATH:" "MIXIN:"
"OCT:"
"POSTPONE:" "PRIVATE>" "<PRIVATE"
(format ":[^ ]* [^ ]+\\(%s\\)*" fuel-syntax--stack-effect-regex)
"M[^:]*: [^ ]+ [^ ]+"))
-(defconst fuel-syntax--constructor-regex
+(defconst fuel-syntax--constructor-decl-regex
"\\_<C: +\\(\\w+\\) +\\(\\w+\\)\\( .*\\)?$")
(defconst fuel-syntax--typedef-regex
;; Comments:
("\\_<\\(#?!\\) .*\\(\n\\|$\\)" (1 "<") (2 ">"))
("\\_<\\(#?!\\)\\(\n\\|$\\)" (1 "<") (2 ">"))
- ("\\_<\\((\\) \\([^)\n]*?\\) \\()\\)\\_>" (1 "<b") (2 "w") (3 ">b"))
+ (" \\((\\)( \\([^\n]*\\) )\\()\\)\\( \\|\n\\)" (1 "<b") (2 "w") (3 ">b"))
+ (" \\((\\) \\([^\n]*\\) \\()\\)\\( \\|\n\\)" (1 "<b") (2 "w") (3 ">b"))
;; Strings
- ("\\_<\\(\"\\)\\([^\n\r\f\"]\\|\\\\\"\\)*\\(\"\\)\\_>" (1 "\"") (3 "\""))
+ ("\\( \\|^\\)\\(DLL\\|P\\|SBUF\\)\\(\"\\)[^\n\r\f]*?\\(\"\\)\\( \\|\n\\)"
+ (3 "\"") (4 "\""))
+ ("\\(\"\\)[^\n\r\f]*?\\(\"\\)\\( \\|\n\\|$\\)" (1 "\"") (2 "\""))
("\\_<<\\(\"\\)\\_>" (1 "<b"))
("\\_<\\(\"\\)>\\_>" (1 ">b"))
;; Multiline constructs
("\\_<\\(U\\)SING: \\(;\\)" (1 "<b") (2 ">b"))
("\\_<USING:\\( \\)" (1 "<b"))
- ("\\_<TUPLE: +\\w+? +< +\\w+? *\\( \\)" (1 "<b"))
- ("\\_<\\(TUPLE\\|SYMBOLS\\|VARS\\): +\\w+? *\\( \\)\\([^<\n]\\|\\_>\\)" (2 "<b"))
+ ("\\_<\\(C\\)-ENUM: \\(;\\)" (1 "<b") (2 ">b"))
+ ("\\_<C-ENUM:\\( \\|\n\\)" (1 "<b"))
+ ("\\_<TUPLE: +\\w+? +< +\\w+? *\\( \\|\n\\)\\([^;]\\|$\\)" (1 "<b"))
+ ("\\_<\\(TUPLE\\|SYMBOLS\\|VARS\\): +\\w+? *\\( \\|\n\\)\\([^;<\n]\\|\\_>\\)"
+ (2 "<b"))
("\\(\n\\| \\);\\_>" (1 ">b"))
;; Let and lambda:
("\\_<\\(!(\\) .* \\()\\)" (1 "<") (2 ">"))
:group 'fuel-xref
:type 'boolean)
+(fuel-edit--define-custom-visit
+ fuel-xref-follow-link-method
+ fuel-xref
+ "How new buffers are opened when following a crossref link.")
+
(fuel-font-lock--defface fuel-font-lock-xref-link
'link fuel-xref "highlighting links in cross-reference buffers")
(when (not (file-readable-p file))
(error "File '%s' is not readable" file))
(let ((word fuel-xref--word))
- (find-file-other-window file)
+ (fuel-edit--visit-file file fuel-xref-follow-link-method)
(when (numberp line) (goto-line line))
(when (and word fuel-xref-follow-link-to-word-p)
- (and (search-forward word
- (fuel-syntax--end-of-defun-pos)
- t)
+ (and (re-search-forward (format "\\_<%s\\_>" word)
+ (fuel-syntax--end-of-defun-pos)
+ t)
(goto-char (match-beginning 0)))))))
\f
(defvar fuel-xref--help-string
"(Press RET or click to follow crossrefs, or h for help on word at point)")
-(defun fuel-xref--title (word cc count)
+(defun fuel-xref--title (word cc count thing)
(put-text-property 0 (length word) 'font-lock-face 'bold word)
- (cond ((zerop count) (format "No known words %s %s" cc word))
- ((= 1 count) (format "1 word %s %s:" cc word))
- (t (format "%s words %s %s:" count cc word))))
+ (cond ((zerop count) (format "No known %s %s %s" thing cc word))
+ ((= 1 count) (format "1 %s %s %s:" thing cc word))
+ (t (format "%s %ss %s %s:" count thing cc word))))
(defun fuel-xref--insert-ref (ref &optional no-vocab)
(when (and (stringp (first ref))
(newline)
t))
-(defun fuel-xref--fill-buffer (word cc refs &optional no-vocab app)
+(defun fuel-xref--fill-buffer (word cc refs &optional no-vocab app thing)
(let ((inhibit-read-only t)
(count 0))
(with-current-buffer (fuel-xref--buffer)
(newline)
(goto-char start)
(save-excursion
- (insert (fuel-xref--title word cc count) "\n\n"))
+ (insert (fuel-xref--title word cc count (or thing "word")) "\n\n"))
count))))
-(defun fuel-xref--fill-and-display (word cc refs &optional no-vocab)
- (let ((count (fuel-xref--fill-buffer word cc refs no-vocab)))
+(defun fuel-xref--fill-and-display (word cc refs &optional no-vocab thing)
+ (let ((count (fuel-xref--fill-buffer word cc refs no-vocab nil (or thing "word"))))
(if (zerop count)
- (error (fuel-xref--title word cc 0))
+ (error (fuel-xref--title word cc 0 (or thing "word")))
(message "")
(fuel-popup--display (fuel-xref--buffer)))))
+(defun fuel-xref--callers (word)
+ (let ((cmd `(:fuel* (((:quote ,word) fuel-callers-xref)))))
+ (fuel-eval--retort-result (fuel-eval--send/wait cmd))))
+
(defun fuel-xref--show-callers (word)
- (let* ((cmd `(:fuel* (((:quote ,word) fuel-callers-xref))))
- (res (fuel-eval--retort-result (fuel-eval--send/wait cmd))))
- (fuel-xref--fill-and-display word "using" res)))
+ (let ((refs (fuel-xref--callers word)))
+ (with-current-buffer (fuel-xref--buffer) (setq fuel-xref--word word))
+ (fuel-xref--fill-and-display word "using" refs)))
+
+(defun fuel-xref--word-callers-files (word)
+ (mapcar 'third (fuel-xref--callers word)))
(defun fuel-xref--show-callees (word)
(let* ((cmd `(:fuel* (((:quote ,word) fuel-callees-xref))))
(res (fuel-eval--retort-result (fuel-eval--send/wait cmd))))
+ (with-current-buffer (fuel-xref--buffer) (setq fuel-xref--word nil))
(fuel-xref--fill-and-display word "used by" res)))
(defun fuel-xref--apropos (str)
(let* ((cmd `(:fuel* ((,str fuel-apropos-xref))))
(res (fuel-eval--retort-result (fuel-eval--send/wait cmd))))
+ (with-current-buffer (fuel-xref--buffer) (setq fuel-xref--word nil))
(fuel-xref--fill-and-display str "containing" res)))
(defun fuel-xref--show-vocab (vocab &optional app)
(let* ((cmd `(:fuel* ((,vocab fuel-vocab-xref)) ,vocab))
(res (fuel-eval--retort-result (fuel-eval--send/wait cmd))))
+ (with-current-buffer (fuel-xref--buffer) (setq fuel-xref--word nil))
(fuel-xref--fill-buffer vocab "in vocabulary" res t app)))
(defun fuel-xref--show-vocab-words (vocab &optional private)
(fuel-popup--display (fuel-xref--buffer))
(goto-char (point-min)))
+(defun fuel-xref--show-vocab-usage (vocab)
+ (let* ((cmd `(:fuel* ((,vocab fuel-vocab-usage-xref))))
+ (res (fuel-eval--retort-result (fuel-eval--send/wait cmd))))
+ (with-current-buffer (fuel-xref--buffer) (setq fuel-xref--word nil))
+ (fuel-xref--fill-and-display vocab "using" res t "vocab")))
+
+(defun fuel-xref--show-vocab-uses (vocab)
+ (let* ((cmd `(:fuel* ((,vocab fuel-vocab-uses-xref))))
+ (res (fuel-eval--retort-result (fuel-eval--send/wait cmd))))
+ (with-current-buffer (fuel-xref--buffer) (setq fuel-xref--word nil))
+ (fuel-xref--fill-and-display vocab "used by" res t "vocab")))
+
\f
;;; User commands:
(defvar fuel-xref--word-history nil)
(defun fuel-show-callers (&optional arg)
- "Show a list of callers of word at point.
+ "Show a list of callers of word or vocabulary at point.
With prefix argument, ask for word."
(interactive "P")
(let ((word (if arg (fuel-completion--read-word "Find callers for: "
fuel-xref--word-history)
(fuel-syntax-symbol-at-point))))
(when word
- (message "Looking up %s's callers ..." word)
- (fuel-xref--show-callers word))))
+ (message "Looking up %s's users ..." word)
+ (if (and (not arg)
+ (fuel-edit--looking-at-vocab))
+ (fuel-xref--show-vocab-usage word)
+ (fuel-xref--show-callers word)))))
(defun fuel-show-callees (&optional arg)
- "Show a list of callers of word at point.
+ "Show a list of callers of word or vocabulary at point.
With prefix argument, ask for word."
(interactive "P")
(let ((word (if arg (fuel-completion--read-word "Find callees for: "
(fuel-syntax-symbol-at-point))))
(when word
(message "Looking up %s's callees ..." word)
- (fuel-xref--show-callees word))))
+ (if (and (not arg)
+ (fuel-edit--looking-at-vocab))
+ (fuel-xref--show-vocab-uses word)
+ (fuel-xref--show-callees word)))))
+
+(defvar fuel-xref--vocab-history nil)
+
+(defun fuel-vocab-uses (&optional arg)
+ "Show a list of vocabularies used by a given one.
+With prefix argument, force reload of vocabulary list."
+ (interactive "P")
+ (let ((vocab (fuel-completion--read-vocab arg
+ (fuel-syntax-symbol-at-point)
+ fuel-xref--vocab-history)))
+ (fuel-xref--show-vocab-uses vocab)))
+
+(defun fuel-vocab-usage (&optional arg)
+ "Show a list of vocabularies that use a given one.
+With prefix argument, force reload of vocabulary list."
+ (interactive "P")
+ (let ((vocab (fuel-completion--read-vocab arg
+ (fuel-syntax-symbol-at-point)
+ fuel-xref--vocab-history)))
+ (fuel-xref--show-vocab-usage vocab)))
(defun fuel-apropos (str)
"Show a list of words containing the given substring."
--- /dev/null
+
+USING: io io.encodings.ascii io.files io.files.temp io.launcher
+ locals math.parser sequences sequences.deep
+ help.syntax
+ easy-help ;
+
+IN: size-of
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+Word: size-of
+
+Values:
+
+ HEADERS sequence : List of header files
+ TYPE string : A C type
+ n integer : Size in number of bytes ..
+
+Description:
+
+ Use 'size-of' to find out the size in bytes of a C type.
+
+ The 'headers' argument is a list of header files to use. You may
+ pass 'f' to only use 'stdio.h'. ..
+
+Example:
+
+ ! Find the size of 'int'
+
+ f "int" size-of . ..
+
+Example:
+
+ ! Find the size of the 'XAnyEvent' struct from Xlib.h
+
+ { "X11/Xlib.h" } "XAnyEvent" size-of . ..
+
+;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+:: size-of ( HEADERS TYPE -- n )
+
+ [let | C-FILE [ "size-of.c" temp-file ]
+ EXE-FILE [ "size-of" temp-file ]
+ INCLUDES [ HEADERS [| FILE | { "#include <" FILE ">" } concat ] map ] |
+
+ {
+ "#include <stdio.h>"
+ INCLUDES
+ "main() { printf( \"%i\" , sizeof( " TYPE " ) ) ; }"
+ }
+
+ flatten C-FILE ascii set-file-lines
+
+ { "gcc" C-FILE "-o" EXE-FILE } try-process
+
+ EXE-FILE ascii <process-reader> contents string>number ] ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+++ /dev/null
-! Copyright (C) 2008 Daniel Ehrenberg.
-! See http://factorcode.org/license.txt for BSD license.
-USING: lexer parser splitting kernel quotations namespaces make
-sequences assocs sequences.lib xml.generator xml.utilities
-xml.data ;
-IN: xml.syntax
-
-: parsed-name ( accum -- accum )
- scan ":" split1 [ f <name> ] [ <simple-name> ] if* parsed ;
-
-: run-combinator ( accum quot1 quot2 -- accum )
- >r [ ] like parsed r> [ parsed ] each ;
-
-: parse-tag-contents ( accum contained? -- accum )
- [ \ contained*, parsed ] [
- scan-word \ [ =
- [ POSTPONE: [ \ tag*, parsed ]
- [ "Expected [ missing" throw ] if
- ] if ;
-
-DEFER: >>
-
-: attributes-parsed ( accum quot -- accum )
- [ f parsed ] [
- >r \ >r parsed r> parsed
- [ H{ } make-assoc r> swap ] [ parsed ] each
- ] if-empty ;
-
-: <<
- parsed-name [
- \ >> parse-until >quotation
- attributes-parsed \ contained? get
- ] with-scope parse-tag-contents ; parsing
-
-: ==
- \ call parsed parsed-name \ set parsed ; parsing
-
-: //
- \ contained? on ; parsing
-
-: parse-special ( accum end-token word -- accum )
- >r parse-tokens " " join parsed r> parsed ;
-
-: <!-- "-->" \ comment, parse-special ; parsing
-
-: <! ">" \ directive, parse-special ; parsing
-
-: <? "?>" \ instruction, parse-special ; parsing
-
-: >xml-document ( seq -- xml )
- dup first prolog? [ unclip-slice ] [ standard-prolog ] if swap
- [ tag? ] split-around <xml> ;
-
-DEFER: XML>
-
-: <XML
- \ XML> [ >quotation ] parse-literal
- { } parsed \ make parsed \ >xml-document parsed ; parsing
critical_error("Bug in set_callstack()",0);
}
-F_COMPILED *frame_code(F_STACK_FRAME *frame)
+F_CODE_BLOCK *frame_code(F_STACK_FRAME *frame)
{
- return (F_COMPILED *)frame->xt - 1;
+ return (F_CODE_BLOCK *)frame->xt - 1;
}
CELL frame_type(F_STACK_FRAME *frame)
CELL frame_executing(F_STACK_FRAME *frame)
{
- F_COMPILED *compiled = frame_code(frame);
- CELL code_start = (CELL)(compiled + 1);
- CELL literal_start = code_start + compiled->code_length;
-
- return get(literal_start);
+ F_CODE_BLOCK *compiled = frame_code(frame);
+ if(compiled->literals == F)
+ return F;
+ else
+ {
+ F_ARRAY *array = untag_object(compiled->literals);
+ return array_nth(array,0);
+ }
}
F_STACK_FRAME *frame_successor(F_STACK_FRAME *frame)
void iterate_callstack(CELL top, CELL bottom, CALLSTACK_ITER iterator);
void iterate_callstack_object(F_CALLSTACK *stack, CALLSTACK_ITER iterator);
F_STACK_FRAME *frame_successor(F_STACK_FRAME *frame);
-F_COMPILED *frame_code(F_STACK_FRAME *frame);
+F_CODE_BLOCK *frame_code(F_STACK_FRAME *frame);
CELL frame_executing(F_STACK_FRAME *frame);
CELL frame_scan(F_STACK_FRAME *frame);
CELL frame_type(F_STACK_FRAME *frame);
--- /dev/null
+#include "master.h"
+
+void flush_icache_for(F_CODE_BLOCK *compiled)
+{
+ CELL start = (CELL)(compiled + 1);
+ flush_icache(start,compiled->code_length);
+}
+
+void iterate_relocations(F_CODE_BLOCK *compiled, RELOCATION_ITERATOR iter)
+{
+ if(compiled->relocation != F)
+ {
+ F_BYTE_ARRAY *relocation = untag_object(compiled->relocation);
+
+ F_REL *rel = (F_REL *)(relocation + 1);
+ F_REL *rel_end = (F_REL *)((char *)rel + byte_array_capacity(relocation));
+
+ while(rel < rel_end)
+ {
+ iter(rel,compiled);
+ rel++;
+ }
+ }
+}
+
+/* Store a 32-bit value into a PowerPC LIS/ORI sequence */
+INLINE void store_address_2_2(CELL cell, CELL value)
+{
+ put(cell - CELLS,((get(cell - CELLS) & ~0xffff) | ((value >> 16) & 0xffff)));
+ put(cell,((get(cell) & ~0xffff) | (value & 0xffff)));
+}
+
+/* Store a value into a bitfield of a PowerPC instruction */
+INLINE void store_address_masked(CELL cell, F_FIXNUM value, CELL mask, F_FIXNUM shift)
+{
+ /* This is unaccurate but good enough */
+ F_FIXNUM test = (F_FIXNUM)mask >> 1;
+ if(value <= -test || value >= test)
+ critical_error("Value does not fit inside relocation",0);
+
+ u32 original = *(u32*)cell;
+ original &= ~mask;
+ *(u32*)cell = (original | ((value >> shift) & mask));
+}
+
+/* Perform a fixup on a code block */
+void store_address_in_code_block(CELL class, CELL offset, F_FIXNUM absolute_value)
+{
+ F_FIXNUM relative_value = absolute_value - offset;
+
+ switch(class)
+ {
+ case RC_ABSOLUTE_CELL:
+ put(offset,absolute_value);
+ break;
+ case RC_ABSOLUTE:
+ *(u32*)offset = absolute_value;
+ break;
+ case RC_RELATIVE:
+ *(u32*)offset = relative_value - sizeof(u32);
+ break;
+ case RC_ABSOLUTE_PPC_2_2:
+ store_address_2_2(offset,absolute_value);
+ break;
+ case RC_RELATIVE_PPC_2:
+ store_address_masked(offset,relative_value,REL_RELATIVE_PPC_2_MASK,0);
+ break;
+ case RC_RELATIVE_PPC_3:
+ store_address_masked(offset,relative_value,REL_RELATIVE_PPC_3_MASK,0);
+ break;
+ case RC_RELATIVE_ARM_3:
+ store_address_masked(offset,relative_value - CELLS * 2,
+ REL_RELATIVE_ARM_3_MASK,2);
+ break;
+ case RC_INDIRECT_ARM:
+ store_address_masked(offset,relative_value - CELLS,
+ REL_INDIRECT_ARM_MASK,0);
+ break;
+ case RC_INDIRECT_ARM_PC:
+ store_address_masked(offset,relative_value - CELLS * 2,
+ REL_INDIRECT_ARM_MASK,0);
+ break;
+ default:
+ critical_error("Bad rel class",class);
+ break;
+ }
+}
+
+void update_literal_references_step(F_REL *rel, F_CODE_BLOCK *compiled)
+{
+ if(REL_TYPE(rel) == RT_IMMEDIATE)
+ {
+ CELL offset = rel->offset + (CELL)(compiled + 1);
+ F_ARRAY *literals = untag_object(compiled->literals);
+ F_FIXNUM absolute_value = array_nth(literals,REL_ARGUMENT(rel));
+ store_address_in_code_block(REL_CLASS(rel),offset,absolute_value);
+ }
+}
+
+/* Update pointers to literals from compiled code. */
+void update_literal_references(F_CODE_BLOCK *compiled)
+{
+ iterate_relocations(compiled,update_literal_references_step);
+ flush_icache_for(compiled);
+}
+
+/* Copy all literals referenced from a code block to newspace. Only for
+aging and nursery collections */
+void copy_literal_references(F_CODE_BLOCK *compiled)
+{
+ if(collecting_gen >= compiled->last_scan)
+ {
+ if(collecting_accumulation_gen_p())
+ compiled->last_scan = collecting_gen;
+ else
+ compiled->last_scan = collecting_gen + 1;
+
+ /* initialize chase pointer */
+ CELL scan = newspace->here;
+
+ copy_handle(&compiled->literals);
+ copy_handle(&compiled->relocation);
+
+ /* do some tracing so that all reachable literals are now
+ at their final address */
+ copy_reachable_objects(scan,&newspace->here);
+
+ update_literal_references(compiled);
+ }
+}
+
+CELL object_xt(CELL obj)
+{
+ if(type_of(obj) == WORD_TYPE)
+ return (CELL)untag_word(obj)->xt;
+ else
+ return (CELL)untag_quotation(obj)->xt;
+}
+
+void update_word_references_step(F_REL *rel, F_CODE_BLOCK *compiled)
+{
+ if(REL_TYPE(rel) == RT_XT)
+ {
+ CELL offset = rel->offset + (CELL)(compiled + 1);
+ F_ARRAY *literals = untag_object(compiled->literals);
+ CELL xt = object_xt(array_nth(literals,REL_ARGUMENT(rel)));
+ store_address_in_code_block(REL_CLASS(rel),offset,xt);
+ }
+}
+
+/* Relocate new code blocks completely; updating references to literals,
+dlsyms, and words. For all other words in the code heap, we only need
+to update references to other words, without worrying about literals
+or dlsyms. */
+void update_word_references(F_CODE_BLOCK *compiled)
+{
+ if(compiled->needs_fixup)
+ relocate_code_block(compiled);
+ else
+ {
+ iterate_relocations(compiled,update_word_references_step);
+ flush_icache_for(compiled);
+ }
+}
+
+/* Update references to words. This is done after a new code block
+is added to the heap. */
+
+/* Mark all literals referenced from a word XT. Only for tenured
+collections */
+void mark_code_block(F_CODE_BLOCK *compiled)
+{
+ mark_block(compiled_to_block(compiled));
+
+ copy_handle(&compiled->literals);
+ copy_handle(&compiled->relocation);
+
+ flush_icache_for(compiled);
+}
+
+void mark_stack_frame_step(F_STACK_FRAME *frame)
+{
+ mark_code_block(frame_code(frame));
+}
+
+/* Mark code blocks executing in currently active stack frames. */
+void mark_active_blocks(F_CONTEXT *stacks)
+{
+ if(collecting_gen == TENURED)
+ {
+ CELL top = (CELL)stacks->callstack_top;
+ CELL bottom = (CELL)stacks->callstack_bottom;
+
+ iterate_callstack(top,bottom,mark_stack_frame_step);
+ }
+}
+
+void mark_object_code_block(CELL scan)
+{
+ F_WORD *word;
+ F_QUOTATION *quot;
+ F_CALLSTACK *stack;
+
+ switch(object_type(scan))
+ {
+ case WORD_TYPE:
+ word = (F_WORD *)scan;
+ mark_code_block(word->code);
+ if(word->profiling)
+ mark_code_block(word->profiling);
+ break;
+ case QUOTATION_TYPE:
+ quot = (F_QUOTATION *)scan;
+ if(quot->compiledp != F)
+ mark_code_block(quot->code);
+ break;
+ case CALLSTACK_TYPE:
+ stack = (F_CALLSTACK *)scan;
+ iterate_callstack_object(stack,mark_stack_frame_step);
+ break;
+ }
+}
+
+/* References to undefined symbols are patched up to call this function on
+image load */
+void undefined_symbol(void)
+{
+ general_error(ERROR_UNDEFINED_SYMBOL,F,F,NULL);
+}
+
+/* Look up an external library symbol referenced by a compiled code block */
+void *get_rel_symbol(F_REL *rel, F_ARRAY *literals)
+{
+ CELL arg = REL_ARGUMENT(rel);
+ CELL symbol = array_nth(literals,arg);
+ CELL library = array_nth(literals,arg + 1);
+
+ F_DLL *dll = (library == F ? NULL : untag_dll(library));
+
+ if(dll != NULL && !dll->dll)
+ return undefined_symbol;
+
+ if(type_of(symbol) == BYTE_ARRAY_TYPE)
+ {
+ F_SYMBOL *name = alien_offset(symbol);
+ void *sym = ffi_dlsym(dll,name);
+
+ if(sym)
+ return sym;
+ }
+ else if(type_of(symbol) == ARRAY_TYPE)
+ {
+ CELL i;
+ F_ARRAY *names = untag_object(symbol);
+ for(i = 0; i < array_capacity(names); i++)
+ {
+ F_SYMBOL *name = alien_offset(array_nth(names,i));
+ void *sym = ffi_dlsym(dll,name);
+
+ if(sym)
+ return sym;
+ }
+ }
+
+ return undefined_symbol;
+}
+
+/* Compute an address to store at a relocation */
+void relocate_code_block_step(F_REL *rel, F_CODE_BLOCK *compiled)
+{
+ CELL offset = rel->offset + (CELL)(compiled + 1);
+ F_ARRAY *literals = untag_object(compiled->literals);
+ F_FIXNUM absolute_value;
+
+ switch(REL_TYPE(rel))
+ {
+ case RT_PRIMITIVE:
+ absolute_value = (CELL)primitives[REL_ARGUMENT(rel)];
+ break;
+ case RT_DLSYM:
+ absolute_value = (CELL)get_rel_symbol(rel,literals);
+ break;
+ case RT_IMMEDIATE:
+ absolute_value = array_nth(literals,REL_ARGUMENT(rel));
+ break;
+ case RT_XT:
+ absolute_value = object_xt(array_nth(literals,REL_ARGUMENT(rel)));
+ break;
+ case RT_HERE:
+ absolute_value = rel->offset + (CELL)(compiled + 1) + (short)REL_ARGUMENT(rel);
+ break;
+ case RT_LABEL:
+ absolute_value = (CELL)(compiled + 1) + REL_ARGUMENT(rel);
+ break;
+ case RT_STACK_CHAIN:
+ absolute_value = (CELL)&stack_chain;
+ break;
+ default:
+ critical_error("Bad rel type",rel->type);
+ return; /* Can't happen */
+ }
+
+ store_address_in_code_block(REL_CLASS(rel),offset,absolute_value);
+}
+
+/* Perform all fixups on a code block */
+void relocate_code_block(F_CODE_BLOCK *compiled)
+{
+ compiled->last_scan = NURSERY;
+ compiled->needs_fixup = false;
+ iterate_relocations(compiled,relocate_code_block_step);
+ flush_icache_for(compiled);
+}
+
+/* Fixup labels. This is done at compile time, not image load time */
+void fixup_labels(F_ARRAY *labels, CELL code_format, F_CODE_BLOCK *compiled)
+{
+ CELL i;
+ CELL size = array_capacity(labels);
+
+ for(i = 0; i < size; i += 3)
+ {
+ CELL class = to_fixnum(array_nth(labels,i));
+ CELL offset = to_fixnum(array_nth(labels,i + 1));
+ CELL target = to_fixnum(array_nth(labels,i + 2));
+
+ store_address_in_code_block(class,
+ offset + (CELL)(compiled + 1),
+ target + (CELL)(compiled + 1));
+ }
+}
+
+/* Write a sequence of integers to memory, with 'format' bytes per integer */
+void deposit_integers(CELL here, F_ARRAY *array, CELL format)
+{
+ CELL count = array_capacity(array);
+ CELL i;
+
+ for(i = 0; i < count; i++)
+ {
+ F_FIXNUM value = to_fixnum(array_nth(array,i));
+ if(format == 1)
+ bput(here + i,value);
+ else if(format == sizeof(unsigned int))
+ *(unsigned int *)(here + format * i) = value;
+ else if(format == sizeof(CELL))
+ *(CELL *)(here + format * i) = value;
+ else
+ critical_error("Bad format in deposit_integers()",format);
+ }
+}
+
+bool stack_traces_p(void)
+{
+ return to_boolean(userenv[STACK_TRACES_ENV]);
+}
+
+CELL compiled_code_format(void)
+{
+ return untag_fixnum_fast(userenv[JIT_CODE_FORMAT]);
+}
+
+/* Might GC */
+void *allot_code_block(CELL size)
+{
+ void *start = heap_allot(&code_heap,size);
+
+ /* If allocation failed, do a code GC */
+ if(start == NULL)
+ {
+ gc();
+ start = heap_allot(&code_heap,size);
+
+ /* Insufficient room even after code GC, give up */
+ if(start == NULL)
+ {
+ CELL used, total_free, max_free;
+ heap_usage(&code_heap,&used,&total_free,&max_free);
+
+ print_string("Code heap stats:\n");
+ print_string("Used: "); print_cell(used); nl();
+ print_string("Total free space: "); print_cell(total_free); nl();
+ print_string("Largest free block: "); print_cell(max_free); nl();
+ fatal_error("Out of memory in add-compiled-block",0);
+ }
+ }
+
+ return start;
+}
+
+/* Might GC */
+F_CODE_BLOCK *add_compiled_block(
+ CELL type,
+ F_ARRAY *code,
+ F_ARRAY *labels,
+ CELL relocation,
+ CELL literals)
+{
+ CELL code_format = compiled_code_format();
+ CELL code_length = align8(array_capacity(code) * code_format);
+
+ REGISTER_ROOT(literals);
+ REGISTER_ROOT(relocation);
+ REGISTER_UNTAGGED(code);
+ REGISTER_UNTAGGED(labels);
+
+ F_CODE_BLOCK *compiled = allot_code_block(sizeof(F_CODE_BLOCK) + code_length);
+
+ UNREGISTER_UNTAGGED(labels);
+ UNREGISTER_UNTAGGED(code);
+ UNREGISTER_ROOT(relocation);
+ UNREGISTER_ROOT(literals);
+
+ /* compiled header */
+ compiled->type = type;
+ compiled->last_scan = NURSERY;
+ compiled->needs_fixup = true;
+ compiled->code_length = code_length;
+ compiled->literals = literals;
+ compiled->relocation = relocation;
+
+ /* code */
+ deposit_integers((CELL)(compiled + 1),code,code_format);
+
+ /* fixup labels */
+ if(labels) fixup_labels(labels,code_format,compiled);
+
+ /* next time we do a minor GC, we have to scan the code heap for
+ literals */
+ last_code_heap_scan = NURSERY;
+
+ return compiled;
+}
--- /dev/null
+typedef enum {
+ /* arg is a primitive number */
+ RT_PRIMITIVE,
+ /* arg is a literal table index, holding an array pair (symbol/dll) */
+ RT_DLSYM,
+ /* a pointer to a compiled word reference */
+ RT_DISPATCH,
+ /* a compiled word reference */
+ RT_XT,
+ /* current offset */
+ RT_HERE,
+ /* a local label */
+ RT_LABEL,
+ /* immediate literal */
+ RT_IMMEDIATE,
+ /* address of stack_chain var */
+ RT_STACK_CHAIN
+} F_RELTYPE;
+
+typedef enum {
+ /* absolute address in a 64-bit location */
+ RC_ABSOLUTE_CELL,
+ /* absolute address in a 32-bit location */
+ RC_ABSOLUTE,
+ /* relative address in a 32-bit location */
+ RC_RELATIVE,
+ /* relative address in a PowerPC LIS/ORI sequence */
+ RC_ABSOLUTE_PPC_2_2,
+ /* relative address in a PowerPC LWZ/STW/BC instruction */
+ RC_RELATIVE_PPC_2,
+ /* relative address in a PowerPC B/BL instruction */
+ RC_RELATIVE_PPC_3,
+ /* relative address in an ARM B/BL instruction */
+ RC_RELATIVE_ARM_3,
+ /* pointer to address in an ARM LDR/STR instruction */
+ RC_INDIRECT_ARM,
+ /* pointer to address in an ARM LDR/STR instruction offset by 8 bytes */
+ RC_INDIRECT_ARM_PC
+} F_RELCLASS;
+
+#define REL_RELATIVE_PPC_2_MASK 0xfffc
+#define REL_RELATIVE_PPC_3_MASK 0x3fffffc
+#define REL_INDIRECT_ARM_MASK 0xfff
+#define REL_RELATIVE_ARM_3_MASK 0xffffff
+
+/* the rel type is built like a cell to avoid endian-specific code in
+the compiler */
+#define REL_TYPE(r) ((r)->type & 0x000000ff)
+#define REL_CLASS(r) (((r)->type & 0x0000ff00) >> 8)
+#define REL_ARGUMENT(r) (((r)->type & 0xffff0000) >> 16)
+
+/* code relocation consists of a table of entries for each fixup */
+typedef struct {
+ unsigned int type;
+ unsigned int offset;
+} F_REL;
+
+void flush_icache_for(F_CODE_BLOCK *compiled);
+
+typedef void (*RELOCATION_ITERATOR)(F_REL *rel, F_CODE_BLOCK *compiled);
+
+void iterate_relocations(F_CODE_BLOCK *compiled, RELOCATION_ITERATOR iter);
+
+void store_address_in_code_block(CELL class, CELL offset, F_FIXNUM absolute_value);
+
+void relocate_code_block(F_CODE_BLOCK *compiled);
+
+void update_literal_references(F_CODE_BLOCK *compiled);
+
+void copy_literal_references(F_CODE_BLOCK *compiled);
+
+void update_word_references(F_CODE_BLOCK *compiled);
+
+void mark_code_block(F_CODE_BLOCK *compiled);
+
+void mark_active_blocks(F_CONTEXT *stacks);
+
+void mark_object_code_block(CELL scan);
+
+void relocate_code_block(F_CODE_BLOCK *relocating);
+
+CELL compiled_code_format(void);
+
+bool stack_traces_p(void);
+
+F_CODE_BLOCK *add_compiled_block(
+ CELL type,
+ F_ARRAY *code,
+ F_ARRAY *labels,
+ CELL relocation,
+ CELL literals);
heap->free_list = NULL;
}
-/* Allocate a code heap during startup */
-void init_code_heap(CELL size)
-{
- new_heap(&code_heap,size);
-}
-
-bool in_code_heap_p(CELL ptr)
-{
- return (ptr >= code_heap.segment->start
- && ptr <= code_heap.segment->end);
-}
-
/* If there is no previous block, next_free becomes the head of the free list,
else its linked in */
INLINE void update_free_list(F_HEAP *heap, F_BLOCK *prev, F_BLOCK *next_free)
}
/* Allocate a block of memory from the mark and sweep GC heap */
-CELL heap_allot(F_HEAP *heap, CELL size)
+void *heap_allot(F_HEAP *heap, CELL size)
{
F_BLOCK *prev = NULL;
F_BLOCK *scan = heap->free_list;
/* this is our new block */
scan->status = B_ALLOCATED;
- return (CELL)(scan + 1);
+ return scan + 1;
}
- return 0;
+ return NULL;
+}
+
+void mark_block(F_BLOCK *block)
+{
+ /* If already marked, do nothing */
+ switch(block->status)
+ {
+ case B_MARKED:
+ return;
+ case B_ALLOCATED:
+ block->status = B_MARKED;
+ break;
+ default:
+ critical_error("Marking the wrong block",(CELL)block);
+ break;
+ }
}
-/* If in the middle of code GC, we have to grow the heap, GC restarts from
+/* If in the middle of code GC, we have to grow the heap, data GC restarts from
scratch, so we have to unmark any marked blocks. */
void unmark_marked(F_HEAP *heap)
{
return heap->segment->size;
}
-/* Apply a function to every code block */
-void iterate_code_heap(CODE_HEAP_ITERATOR iter)
-{
- F_BLOCK *scan = first_block(&code_heap);
-
- while(scan)
- {
- if(scan->status != B_FREE)
- iterate_code_heap_step(block_to_compiled(scan),iter);
- scan = next_block(&code_heap,scan);
- }
-}
-
-/* Copy all literals referenced from a code block to newspace */
-void collect_literals_step(F_COMPILED *compiled, CELL code_start, CELL literals_start)
-{
- if(collecting_gen >= compiled->last_scan)
- {
- CELL scan;
- CELL literal_end = literals_start + compiled->literals_length;
-
- if(collecting_accumulation_gen_p())
- compiled->last_scan = collecting_gen;
- else
- compiled->last_scan = collecting_gen + 1;
-
- for(scan = literals_start; scan < literal_end; scan += CELLS)
- copy_handle((CELL*)scan);
-
- if(compiled->relocation != F)
- {
- copy_handle(&compiled->relocation);
-
- F_BYTE_ARRAY *relocation = untag_object(compiled->relocation);
-
- F_REL *rel = (F_REL *)(relocation + 1);
- F_REL *rel_end = (F_REL *)((char *)rel + byte_array_capacity(relocation));
-
- while(rel < rel_end)
- {
- if(REL_TYPE(rel) == RT_IMMEDIATE)
- {
- CELL offset = rel->offset + code_start;
- F_FIXNUM absolute_value = get(CREF(literals_start,REL_ARGUMENT(rel)));
- apply_relocation(REL_CLASS(rel),offset,absolute_value);
- }
-
- rel++;
- }
- }
-
- flush_icache(code_start,literals_start - code_start);
- }
-}
-
-/* Copy literals referenced from all code blocks to newspace */
-void collect_literals(void)
-{
- iterate_code_heap(collect_literals_step);
-}
-
-/* Mark all XTs and literals referenced from a word XT */
-void recursive_mark(F_BLOCK *block)
-{
- /* If already marked, do nothing */
- switch(block->status)
- {
- case B_MARKED:
- return;
- case B_ALLOCATED:
- block->status = B_MARKED;
- break;
- default:
- critical_error("Marking the wrong block",(CELL)block);
- break;
- }
-
- F_COMPILED *compiled = block_to_compiled(block);
- iterate_code_heap_step(compiled,collect_literals_step);
-}
-
-/* Push the free space and total size of the code heap */
-void primitive_code_room(void)
-{
- CELL used, total_free, max_free;
- heap_usage(&code_heap,&used,&total_free,&max_free);
- dpush(tag_fixnum((code_heap.segment->size) / 1024));
- dpush(tag_fixnum(used / 1024));
- dpush(tag_fixnum(total_free / 1024));
- dpush(tag_fixnum(max_free / 1024));
-}
-
-/* Dump all code blocks for debugging */
-void dump_heap(F_HEAP *heap)
-{
- CELL size = 0;
-
- F_BLOCK *scan = first_block(heap);
-
- while(scan)
- {
- char *status;
- switch(scan->status)
- {
- case B_FREE:
- status = "free";
- break;
- case B_ALLOCATED:
- size += object_size(block_to_compiled(scan)->relocation);
- status = "allocated";
- break;
- case B_MARKED:
- size += object_size(block_to_compiled(scan)->relocation);
- status = "marked";
- break;
- default:
- status = "invalid";
- break;
- }
-
- print_cell_hex((CELL)scan); print_string(" ");
- print_cell_hex(scan->size); print_string(" ");
- print_string(status); print_string("\n");
-
- scan = next_block(heap,scan);
- }
-
- print_cell(size); print_string(" bytes of relocation data\n");
-}
-
/* Compute where each block is going to go, after compaction */
CELL compute_heap_forwarding(F_HEAP *heap)
{
return address - heap->segment->start;
}
-F_COMPILED *forward_xt(F_COMPILED *compiled)
-{
- return block_to_compiled(compiled_to_block(compiled)->forwarding);
-}
-
-void forward_frame_xt(F_STACK_FRAME *frame)
-{
- CELL offset = (CELL)FRAME_RETURN_ADDRESS(frame) - (CELL)frame_code(frame);
- F_COMPILED *forwarded = forward_xt(frame_code(frame));
- frame->xt = (XT)(forwarded + 1);
- FRAME_RETURN_ADDRESS(frame) = (XT)((CELL)forwarded + offset);
-}
-
-void forward_object_xts(void)
-{
- begin_scan();
-
- CELL obj;
-
- while((obj = next_object()) != F)
- {
- if(type_of(obj) == WORD_TYPE)
- {
- F_WORD *word = untag_object(obj);
-
- word->code = forward_xt(word->code);
- if(word->profiling)
- word->profiling = forward_xt(word->profiling);
- }
- else if(type_of(obj) == QUOTATION_TYPE)
- {
- F_QUOTATION *quot = untag_object(obj);
-
- if(quot->compiledp != F)
- quot->code = forward_xt(quot->code);
- }
- else if(type_of(obj) == CALLSTACK_TYPE)
- {
- F_CALLSTACK *stack = untag_object(obj);
- iterate_callstack_object(stack,forward_frame_xt);
- }
- }
-
- /* End the heap scan */
- gc_off = false;
-}
-
-/* Set the XT fields now that the heap has been compacted */
-void fixup_object_xts(void)
-{
- begin_scan();
-
- CELL obj;
-
- while((obj = next_object()) != F)
- {
- if(type_of(obj) == WORD_TYPE)
- {
- F_WORD *word = untag_object(obj);
- update_word_xt(word);
- }
- else if(type_of(obj) == QUOTATION_TYPE)
- {
- F_QUOTATION *quot = untag_object(obj);
-
- if(quot->compiledp != F)
- set_quot_xt(quot,quot->code);
- }
- }
-
- /* End the heap scan */
- gc_off = false;
-}
-
void compact_heap(F_HEAP *heap)
{
F_BLOCK *scan = first_block(heap);
scan = next;
}
}
-
-/* Move all free space to the end of the code heap. This is not very efficient,
-since it makes several passes over the code and data heaps, but we only ever
-do this before saving a deployed image and exiting, so performaance is not
-critical here */
-void compact_code_heap(void)
-{
- /* Free all unreachable code blocks */
- gc();
-
- /* Figure out where the code heap blocks are going to end up */
- CELL size = compute_heap_forwarding(&code_heap);
-
- /* Update word and quotation code pointers */
- forward_object_xts();
-
- /* Actually perform the compaction */
- compact_heap(&code_heap);
-
- /* Update word and quotation XTs */
- fixup_object_xts();
-
- /* Now update the free list; there will be a single free block at
- the end */
- build_free_list(&code_heap,size);
-}
void new_heap(F_HEAP *heap, CELL size);
void build_free_list(F_HEAP *heap, CELL size);
-CELL heap_allot(F_HEAP *heap, CELL size);
+void *heap_allot(F_HEAP *heap, CELL size);
+void mark_block(F_BLOCK *block);
void unmark_marked(F_HEAP *heap);
void free_unmarked(F_HEAP *heap);
void heap_usage(F_HEAP *heap, CELL *used, CELL *total_free, CELL *max_free);
CELL heap_size(F_HEAP *heap);
+CELL compute_heap_forwarding(F_HEAP *heap);
+void compact_heap(F_HEAP *heap);
INLINE F_BLOCK *next_block(F_HEAP *heap, F_BLOCK *block)
{
return (F_BLOCK *)next;
}
-/* compiled code */
-F_HEAP code_heap;
-
-typedef void (*CODE_HEAP_ITERATOR)(F_COMPILED *compiled, CELL code_start, CELL literals_start);
-
-INLINE void iterate_code_heap_step(F_COMPILED *compiled, CODE_HEAP_ITERATOR iter)
-{
- CELL code_start = (CELL)(compiled + 1);
- CELL literals_start = code_start + compiled->code_length;
-
- iter(compiled,code_start,literals_start);
-}
-
-INLINE F_BLOCK *compiled_to_block(F_COMPILED *compiled)
-{
- return (F_BLOCK *)compiled - 1;
-}
-
-INLINE F_COMPILED *block_to_compiled(F_BLOCK *block)
-{
- return (F_COMPILED *)(block + 1);
-}
-
INLINE F_BLOCK *first_block(F_HEAP *heap)
{
return (F_BLOCK *)heap->segment->start;
{
return (F_BLOCK *)heap->segment->end;
}
-
-void init_code_heap(CELL size);
-bool in_code_heap_p(CELL ptr);
-void iterate_code_heap(CODE_HEAP_ITERATOR iter);
-void collect_literals(void);
-void recursive_mark(F_BLOCK *block);
-void dump_heap(F_HEAP *heap);
-void compact_code_heap(void);
-
-void primitive_code_room(void);
#include "master.h"
-/* References to undefined symbols are patched up to call this function on
-image load */
-void undefined_symbol(void)
+/* Allocate a code heap during startup */
+void init_code_heap(CELL size)
{
- general_error(ERROR_UNDEFINED_SYMBOL,F,F,NULL);
+ new_heap(&code_heap,size);
}
-INLINE CELL get_literal(CELL literals_start, CELL num)
+bool in_code_heap_p(CELL ptr)
{
- return get(CREF(literals_start,num));
+ return (ptr >= code_heap.segment->start
+ && ptr <= code_heap.segment->end);
}
-/* Look up an external library symbol referenced by a compiled code block */
-void *get_rel_symbol(F_REL *rel, CELL literals_start)
+void set_word_code(F_WORD *word, F_CODE_BLOCK *compiled)
{
- CELL arg = REL_ARGUMENT(rel);
- CELL symbol = get_literal(literals_start,arg);
- CELL library = get_literal(literals_start,arg + 1);
-
- F_DLL *dll = (library == F ? NULL : untag_dll(library));
-
- if(dll != NULL && !dll->dll)
- return undefined_symbol;
-
- if(type_of(symbol) == BYTE_ARRAY_TYPE)
- {
- F_SYMBOL *name = alien_offset(symbol);
- void *sym = ffi_dlsym(dll,name);
-
- if(sym)
- return sym;
- }
- else if(type_of(symbol) == ARRAY_TYPE)
- {
- CELL i;
- F_ARRAY *names = untag_object(symbol);
- for(i = 0; i < array_capacity(names); i++)
- {
- F_SYMBOL *name = alien_offset(array_nth(names,i));
- void *sym = ffi_dlsym(dll,name);
-
- if(sym)
- return sym;
- }
- }
-
- return undefined_symbol;
-}
-
-/* Compute an address to store at a relocation */
-INLINE CELL compute_code_rel(F_REL *rel,
- CELL code_start, CELL literals_start)
-{
- CELL obj;
-
- switch(REL_TYPE(rel))
- {
- case RT_PRIMITIVE:
- return (CELL)primitives[REL_ARGUMENT(rel)];
- case RT_DLSYM:
- return (CELL)get_rel_symbol(rel,literals_start);
- case RT_IMMEDIATE:
- return get(CREF(literals_start,REL_ARGUMENT(rel)));
- case RT_XT:
- obj = get(CREF(literals_start,REL_ARGUMENT(rel)));
- if(type_of(obj) == WORD_TYPE)
- return (CELL)untag_word(obj)->xt;
- else
- return (CELL)untag_quotation(obj)->xt;
- case RT_HERE:
- return rel->offset + code_start + (short)REL_ARGUMENT(rel);
- case RT_LABEL:
- return code_start + REL_ARGUMENT(rel);
- case RT_STACK_CHAIN:
- return (CELL)&stack_chain;
- default:
- critical_error("Bad rel type",rel->type);
- return -1; /* Can't happen */
- }
-}
-
-/* Store a 32-bit value into a PowerPC LIS/ORI sequence */
-INLINE void reloc_set_2_2(CELL cell, CELL value)
-{
- put(cell - CELLS,((get(cell - CELLS) & ~0xffff) | ((value >> 16) & 0xffff)));
- put(cell,((get(cell) & ~0xffff) | (value & 0xffff)));
-}
-
-/* Store a value into a bitfield of a PowerPC instruction */
-INLINE void reloc_set_masked(CELL cell, F_FIXNUM value, CELL mask, F_FIXNUM shift)
-{
- /* This is unaccurate but good enough */
- F_FIXNUM test = (F_FIXNUM)mask >> 1;
- if(value <= -test || value >= test)
- critical_error("Value does not fit inside relocation",0);
-
- u32 original = *(u32*)cell;
- original &= ~mask;
- *(u32*)cell = (original | ((value >> shift) & mask));
-}
-
-/* Perform a fixup on a code block */
-void apply_relocation(CELL class, CELL offset, F_FIXNUM absolute_value)
-{
- F_FIXNUM relative_value = absolute_value - offset;
-
- switch(class)
- {
- case RC_ABSOLUTE_CELL:
- put(offset,absolute_value);
- break;
- case RC_ABSOLUTE:
- *(u32*)offset = absolute_value;
- break;
- case RC_RELATIVE:
- *(u32*)offset = relative_value - sizeof(u32);
- break;
- case RC_ABSOLUTE_PPC_2_2:
- reloc_set_2_2(offset,absolute_value);
- break;
- case RC_RELATIVE_PPC_2:
- reloc_set_masked(offset,relative_value,REL_RELATIVE_PPC_2_MASK,0);
- break;
- case RC_RELATIVE_PPC_3:
- reloc_set_masked(offset,relative_value,REL_RELATIVE_PPC_3_MASK,0);
- break;
- case RC_RELATIVE_ARM_3:
- reloc_set_masked(offset,relative_value - CELLS * 2,
- REL_RELATIVE_ARM_3_MASK,2);
- break;
- case RC_INDIRECT_ARM:
- reloc_set_masked(offset,relative_value - CELLS,
- REL_INDIRECT_ARM_MASK,0);
- break;
- case RC_INDIRECT_ARM_PC:
- reloc_set_masked(offset,relative_value - CELLS * 2,
- REL_INDIRECT_ARM_MASK,0);
- break;
- default:
- critical_error("Bad rel class",class);
- break;
- }
-}
-
-/* Perform all fixups on a code block */
-void relocate_code_block(F_COMPILED *compiled, CELL code_start, CELL literals_start)
-{
- compiled->last_scan = NURSERY;
-
- if(compiled->relocation != F)
- {
- F_BYTE_ARRAY *relocation = untag_object(compiled->relocation);
-
- F_REL *rel = (F_REL *)(relocation + 1);
- F_REL *rel_end = (F_REL *)((char *)rel + byte_array_capacity(relocation));
-
- while(rel < rel_end)
- {
- CELL offset = rel->offset + code_start;
-
- F_FIXNUM absolute_value = compute_code_rel(
- rel,code_start,literals_start);
-
- apply_relocation(REL_CLASS(rel),offset,absolute_value);
-
- rel++;
- }
- }
-
- flush_icache(code_start,literals_start - code_start);
-}
-
-/* Fixup labels. This is done at compile time, not image load time */
-void fixup_labels(F_ARRAY *labels, CELL code_format, CELL code_start)
-{
- CELL i;
- CELL size = array_capacity(labels);
-
- for(i = 0; i < size; i += 3)
- {
- CELL class = to_fixnum(array_nth(labels,i));
- CELL offset = to_fixnum(array_nth(labels,i + 1));
- CELL target = to_fixnum(array_nth(labels,i + 2));
-
- apply_relocation(class,
- offset + code_start,
- target + code_start);
- }
-}
-
-/* Write a sequence of integers to memory, with 'format' bytes per integer */
-void deposit_integers(CELL here, F_ARRAY *array, CELL format)
-{
- CELL count = array_capacity(array);
- CELL i;
-
- for(i = 0; i < count; i++)
- {
- F_FIXNUM value = to_fixnum(array_nth(array,i));
- if(format == 1)
- bput(here + i,value);
- else if(format == sizeof(unsigned int))
- *(unsigned int *)(here + format * i) = value;
- else if(format == CELLS)
- put(CREF(here,i),value);
- else
- critical_error("Bad format in deposit_integers()",format);
- }
-}
+ if(compiled->type != WORD_TYPE)
+ critical_error("bad param to set_word_xt",(CELL)compiled);
-/* Write a sequence of tagged pointers to memory */
-void deposit_objects(CELL here, F_ARRAY *array)
-{
- memcpy((void*)here,array + 1,array_capacity(array) * CELLS);
+ word->code = compiled;
+ word->optimizedp = T;
}
-bool stack_traces_p(void)
+/* Allocates memory */
+void default_word_code(F_WORD *word, bool relocate)
{
- return to_boolean(userenv[STACK_TRACES_ENV]);
-}
+ REGISTER_UNTAGGED(word);
+ jit_compile(word->def,relocate);
+ UNREGISTER_UNTAGGED(word);
-CELL compiled_code_format(void)
-{
- return untag_fixnum_fast(userenv[JIT_CODE_FORMAT]);
+ word->code = untag_quotation(word->def)->code;
+ word->optimizedp = F;
}
-CELL allot_code_block(CELL size)
+/* Apply a function to every code block */
+void iterate_code_heap(CODE_HEAP_ITERATOR iter)
{
- CELL start = heap_allot(&code_heap,size);
+ F_BLOCK *scan = first_block(&code_heap);
- /* If allocation failed, do a code GC */
- if(start == 0)
+ while(scan)
{
- gc();
- start = heap_allot(&code_heap,size);
-
- /* Insufficient room even after code GC, give up */
- if(start == 0)
- {
- CELL used, total_free, max_free;
- heap_usage(&code_heap,&used,&total_free,&max_free);
-
- print_string("Code heap stats:\n");
- print_string("Used: "); print_cell(used); nl();
- print_string("Total free space: "); print_cell(total_free); nl();
- print_string("Largest free block: "); print_cell(max_free); nl();
- fatal_error("Out of memory in add-compiled-block",0);
- }
+ if(scan->status != B_FREE)
+ iter(block_to_compiled(scan));
+ scan = next_block(&code_heap,scan);
}
-
- return start;
}
-/* Might GC */
-F_COMPILED *add_compiled_block(
- CELL type,
- F_ARRAY *code,
- F_ARRAY *labels,
- CELL relocation,
- F_ARRAY *literals)
+/* Copy literals referenced from all code blocks to newspace. Only for
+aging and nursery collections */
+void copy_code_heap_roots(void)
{
- CELL code_format = compiled_code_format();
-
- CELL code_length = align8(array_capacity(code) * code_format);
- CELL literals_length = array_capacity(literals) * CELLS;
-
- REGISTER_ROOT(relocation);
- REGISTER_UNTAGGED(code);
- REGISTER_UNTAGGED(labels);
- REGISTER_UNTAGGED(literals);
-
- CELL here = allot_code_block(sizeof(F_COMPILED) + code_length + literals_length);
-
- UNREGISTER_UNTAGGED(literals);
- UNREGISTER_UNTAGGED(labels);
- UNREGISTER_UNTAGGED(code);
- UNREGISTER_ROOT(relocation);
-
- /* compiled header */
- F_COMPILED *header = (void *)here;
- header->type = type;
- header->last_scan = NURSERY;
- header->code_length = code_length;
- header->literals_length = literals_length;
- header->relocation = relocation;
-
- here += sizeof(F_COMPILED);
-
- CELL code_start = here;
-
- /* code */
- deposit_integers(here,code,code_format);
- here += code_length;
-
- /* literals */
- deposit_objects(here,literals);
- here += literals_length;
-
- /* fixup labels */
- if(labels)
- fixup_labels(labels,code_format,code_start);
-
- /* next time we do a minor GC, we have to scan the code heap for
- literals */
- last_code_heap_scan = NURSERY;
-
- return header;
+ iterate_code_heap(copy_literal_references);
}
-void set_word_code(F_WORD *word, F_COMPILED *compiled)
+/* Update literals referenced from all code blocks. Only for tenured
+collections, done at the end. */
+void update_code_heap_roots(void)
{
- if(compiled->type != WORD_TYPE)
- critical_error("bad param to set_word_xt",(CELL)compiled);
-
- word->code = compiled;
- word->compiledp = T;
+ iterate_code_heap(update_literal_references);
}
-/* Allocates memory */
-void default_word_code(F_WORD *word, bool relocate)
+/* Update pointers to words referenced from all code blocks. Only after
+defining a new word. */
+void update_code_heap_words(void)
{
- REGISTER_UNTAGGED(word);
- jit_compile(word->def,relocate);
- UNREGISTER_UNTAGGED(word);
-
- word->code = untag_quotation(word->def)->code;
- word->compiledp = F;
+ iterate_code_heap(update_word_references);
}
void primitive_modify_code_heap(void)
{
- bool rescan_code_heap = to_boolean(dpop());
F_ARRAY *alist = untag_array(dpop());
CELL count = untag_fixnum_fast(alist->capacity);
+ if(count == 0)
+ return;
+
CELL i;
for(i = 0; i < count; i++)
{
REGISTER_UNTAGGED(alist);
REGISTER_UNTAGGED(word);
- F_COMPILED *compiled = add_compiled_block(
+ F_CODE_BLOCK *compiled = add_compiled_block(
WORD_TYPE,
code,
labels,
relocation,
- literals);
+ tag_object(literals));
UNREGISTER_UNTAGGED(word);
UNREGISTER_UNTAGGED(alist);
UNREGISTER_UNTAGGED(alist);
}
- /* If there were any interned words in the set, we relocate all XT
- references in the entire code heap. But if all the words are
- uninterned, it is impossible that other words reference them, so we
- only have to relocate the new words. This makes compile-call much
- more efficient */
- if(rescan_code_heap)
- iterate_code_heap(relocate_code_block);
- else
+ update_code_heap_words();
+}
+
+/* Push the free space and total size of the code heap */
+void primitive_code_room(void)
+{
+ CELL used, total_free, max_free;
+ heap_usage(&code_heap,&used,&total_free,&max_free);
+ dpush(tag_fixnum((code_heap.segment->size) / 1024));
+ dpush(tag_fixnum(used / 1024));
+ dpush(tag_fixnum(total_free / 1024));
+ dpush(tag_fixnum(max_free / 1024));
+}
+
+F_CODE_BLOCK *forward_xt(F_CODE_BLOCK *compiled)
+{
+ return block_to_compiled(compiled_to_block(compiled)->forwarding);
+}
+
+void forward_frame_xt(F_STACK_FRAME *frame)
+{
+ CELL offset = (CELL)FRAME_RETURN_ADDRESS(frame) - (CELL)frame_code(frame);
+ F_CODE_BLOCK *forwarded = forward_xt(frame_code(frame));
+ frame->xt = (XT)(forwarded + 1);
+ FRAME_RETURN_ADDRESS(frame) = (XT)((CELL)forwarded + offset);
+}
+
+void forward_object_xts(void)
+{
+ begin_scan();
+
+ CELL obj;
+
+ while((obj = next_object()) != F)
{
- for(i = 0; i < count; i++)
+ if(type_of(obj) == WORD_TYPE)
{
- F_ARRAY *pair = untag_array(array_nth(alist,i));
- F_WORD *word = untag_word(array_nth(pair,0));
+ F_WORD *word = untag_object(obj);
- iterate_code_heap_step(word->code,relocate_code_block);
+ word->code = forward_xt(word->code);
+ if(word->profiling)
+ word->profiling = forward_xt(word->profiling);
+ }
+ else if(type_of(obj) == QUOTATION_TYPE)
+ {
+ F_QUOTATION *quot = untag_object(obj);
+
+ if(quot->compiledp != F)
+ quot->code = forward_xt(quot->code);
+ }
+ else if(type_of(obj) == CALLSTACK_TYPE)
+ {
+ F_CALLSTACK *stack = untag_object(obj);
+ iterate_callstack_object(stack,forward_frame_xt);
}
}
+
+ /* End the heap scan */
+ gc_off = false;
+}
+
+/* Set the XT fields now that the heap has been compacted */
+void fixup_object_xts(void)
+{
+ begin_scan();
+
+ CELL obj;
+
+ while((obj = next_object()) != F)
+ {
+ if(type_of(obj) == WORD_TYPE)
+ {
+ F_WORD *word = untag_object(obj);
+ update_word_xt(word);
+ }
+ else if(type_of(obj) == QUOTATION_TYPE)
+ {
+ F_QUOTATION *quot = untag_object(obj);
+
+ if(quot->compiledp != F)
+ set_quot_xt(quot,quot->code);
+ }
+ }
+
+ /* End the heap scan */
+ gc_off = false;
+}
+
+/* Move all free space to the end of the code heap. This is not very efficient,
+since it makes several passes over the code and data heaps, but we only ever
+do this before saving a deployed image and exiting, so performaance is not
+critical here */
+void compact_code_heap(void)
+{
+ /* Free all unreachable code blocks */
+ gc();
+
+ /* Figure out where the code heap blocks are going to end up */
+ CELL size = compute_heap_forwarding(&code_heap);
+
+ /* Update word and quotation code pointers */
+ forward_object_xts();
+
+ /* Actually perform the compaction */
+ compact_heap(&code_heap);
+
+ /* Update word and quotation XTs */
+ fixup_object_xts();
+
+ /* Now update the free list; there will be a single free block at
+ the end */
+ build_free_list(&code_heap,size);
}
-typedef enum {
- /* arg is a primitive number */
- RT_PRIMITIVE,
- /* arg is a literal table index, holding an array pair (symbol/dll) */
- RT_DLSYM,
- /* a pointer to a compiled word reference */
- RT_DISPATCH,
- /* a compiled word reference */
- RT_XT,
- /* current offset */
- RT_HERE,
- /* a local label */
- RT_LABEL,
- /* immediate literal */
- RT_IMMEDIATE,
- /* address of stack_chain var */
- RT_STACK_CHAIN
-} F_RELTYPE;
+/* compiled code */
+F_HEAP code_heap;
-typedef enum {
- /* absolute address in a 64-bit location */
- RC_ABSOLUTE_CELL,
- /* absolute address in a 32-bit location */
- RC_ABSOLUTE,
- /* relative address in a 32-bit location */
- RC_RELATIVE,
- /* relative address in a PowerPC LIS/ORI sequence */
- RC_ABSOLUTE_PPC_2_2,
- /* relative address in a PowerPC LWZ/STW/BC instruction */
- RC_RELATIVE_PPC_2,
- /* relative address in a PowerPC B/BL instruction */
- RC_RELATIVE_PPC_3,
- /* relative address in an ARM B/BL instruction */
- RC_RELATIVE_ARM_3,
- /* pointer to address in an ARM LDR/STR instruction */
- RC_INDIRECT_ARM,
- /* pointer to address in an ARM LDR/STR instruction offset by 8 bytes */
- RC_INDIRECT_ARM_PC
-} F_RELCLASS;
+INLINE F_BLOCK *compiled_to_block(F_CODE_BLOCK *compiled)
+{
+ return (F_BLOCK *)compiled - 1;
+}
-#define REL_RELATIVE_PPC_2_MASK 0xfffc
-#define REL_RELATIVE_PPC_3_MASK 0x3fffffc
-#define REL_INDIRECT_ARM_MASK 0xfff
-#define REL_RELATIVE_ARM_3_MASK 0xffffff
+INLINE F_CODE_BLOCK *block_to_compiled(F_BLOCK *block)
+{
+ return (F_CODE_BLOCK *)(block + 1);
+}
-/* the rel type is built like a cell to avoid endian-specific code in
-the compiler */
-#define REL_TYPE(r) ((r)->type & 0x000000ff)
-#define REL_CLASS(r) (((r)->type & 0x0000ff00) >> 8)
-#define REL_ARGUMENT(r) (((r)->type & 0xffff0000) >> 16)
+void init_code_heap(CELL size);
-/* code relocation consists of a table of entries for each fixup */
-typedef struct {
- unsigned int type;
- unsigned int offset;
-} F_REL;
+bool in_code_heap_p(CELL ptr);
-#define CREF(array,i) ((CELL)(array) + CELLS * (i))
-
-void apply_relocation(CELL class, CELL offset, F_FIXNUM absolute_value);
+void default_word_code(F_WORD *word, bool relocate);
-void relocate_code_block(F_COMPILED *relocating, CELL code_start, CELL literals_start);
+void set_word_code(F_WORD *word, F_CODE_BLOCK *compiled);
-void default_word_code(F_WORD *word, bool relocate);
+typedef void (*CODE_HEAP_ITERATOR)(F_CODE_BLOCK *compiled);
-void set_word_code(F_WORD *word, F_COMPILED *compiled);
+void iterate_code_heap(CODE_HEAP_ITERATOR iter);
-F_COMPILED *add_compiled_block(
- CELL type,
- F_ARRAY *code,
- F_ARRAY *labels,
- CELL relocation,
- F_ARRAY *literals);
+void copy_code_heap_roots(void);
-CELL compiled_code_format(void);
-bool stack_traces_p(void);
+void update_code_heap_roots(void);
void primitive_modify_code_heap(void);
+
+void primitive_code_room(void);
+
+void compact_code_heap(void);
#include "master.h"
-CELL init_zone(F_ZONE *z, CELL size, CELL start)
-{
- z->size = size;
- z->start = z->here = start;
- z->end = start + size;
- return z->end;
-}
-
-void init_card_decks(void)
-{
- CELL start = align(data_heap->segment->start,DECK_SIZE);
- allot_markers_offset = (CELL)data_heap->allot_markers - (start >> CARD_BITS);
- cards_offset = (CELL)data_heap->cards - (start >> CARD_BITS);
- decks_offset = (CELL)data_heap->decks - (start >> DECK_BITS);
-}
-
-F_DATA_HEAP *alloc_data_heap(CELL gens,
- CELL young_size,
- CELL aging_size,
- CELL tenured_size)
-{
- young_size = align(young_size,DECK_SIZE);
- aging_size = align(aging_size,DECK_SIZE);
- tenured_size = align(tenured_size,DECK_SIZE);
-
- F_DATA_HEAP *data_heap = safe_malloc(sizeof(F_DATA_HEAP));
- data_heap->young_size = young_size;
- data_heap->aging_size = aging_size;
- data_heap->tenured_size = tenured_size;
- data_heap->gen_count = gens;
-
- CELL total_size;
- if(data_heap->gen_count == 2)
- total_size = young_size + 2 * tenured_size;
- else if(data_heap->gen_count == 3)
- total_size = young_size + 2 * aging_size + 2 * tenured_size;
- else
- {
- fatal_error("Invalid number of generations",data_heap->gen_count);
- return NULL; /* can't happen */
- }
-
- total_size += DECK_SIZE;
-
- data_heap->segment = alloc_segment(total_size);
-
- data_heap->generations = safe_malloc(sizeof(F_ZONE) * data_heap->gen_count);
- data_heap->semispaces = safe_malloc(sizeof(F_ZONE) * data_heap->gen_count);
-
- CELL cards_size = total_size >> CARD_BITS;
- data_heap->allot_markers = safe_malloc(cards_size);
- data_heap->allot_markers_end = data_heap->allot_markers + cards_size;
-
- data_heap->cards = safe_malloc(cards_size);
- data_heap->cards_end = data_heap->cards + cards_size;
-
- CELL decks_size = total_size >> DECK_BITS;
- data_heap->decks = safe_malloc(decks_size);
- data_heap->decks_end = data_heap->decks + decks_size;
-
- CELL alloter = align(data_heap->segment->start,DECK_SIZE);
-
- alloter = init_zone(&data_heap->generations[TENURED],tenured_size,alloter);
- alloter = init_zone(&data_heap->semispaces[TENURED],tenured_size,alloter);
-
- if(data_heap->gen_count == 3)
- {
- alloter = init_zone(&data_heap->generations[AGING],aging_size,alloter);
- alloter = init_zone(&data_heap->semispaces[AGING],aging_size,alloter);
- }
-
- if(data_heap->gen_count >= 2)
- {
- alloter = init_zone(&data_heap->generations[NURSERY],young_size,alloter);
- alloter = init_zone(&data_heap->semispaces[NURSERY],0,alloter);
- }
-
- if(data_heap->segment->end - alloter > DECK_SIZE)
- critical_error("Bug in alloc_data_heap",alloter);
-
- return data_heap;
-}
-
-F_DATA_HEAP *grow_data_heap(F_DATA_HEAP *data_heap, CELL requested_bytes)
-{
- CELL new_tenured_size = (data_heap->tenured_size * 2) + requested_bytes;
-
- return alloc_data_heap(data_heap->gen_count,
- data_heap->young_size,
- data_heap->aging_size,
- new_tenured_size);
-}
-
-void dealloc_data_heap(F_DATA_HEAP *data_heap)
-{
- dealloc_segment(data_heap->segment);
- free(data_heap->generations);
- free(data_heap->semispaces);
- free(data_heap->allot_markers);
- free(data_heap->cards);
- free(data_heap->decks);
- free(data_heap);
-}
-
-void clear_cards(CELL from, CELL to)
-{
- /* NOTE: reverse order due to heap layout. */
- F_CARD *first_card = ADDR_TO_CARD(data_heap->generations[to].start);
- F_CARD *last_card = ADDR_TO_CARD(data_heap->generations[from].end);
- memset(first_card,0,last_card - first_card);
-}
-
-void clear_decks(CELL from, CELL to)
-{
- /* NOTE: reverse order due to heap layout. */
- F_DECK *first_deck = ADDR_TO_DECK(data_heap->generations[to].start);
- F_DECK *last_deck = ADDR_TO_DECK(data_heap->generations[from].end);
- memset(first_deck,0,last_deck - first_deck);
-}
-
-void clear_allot_markers(CELL from, CELL to)
-{
- /* NOTE: reverse order due to heap layout. */
- F_CARD *first_card = ADDR_TO_ALLOT_MARKER(data_heap->generations[to].start);
- F_CARD *last_card = ADDR_TO_ALLOT_MARKER(data_heap->generations[from].end);
- memset(first_card,INVALID_ALLOT_MARKER,last_card - first_card);
-}
-
-void set_data_heap(F_DATA_HEAP *data_heap_)
-{
- data_heap = data_heap_;
- nursery = data_heap->generations[NURSERY];
- init_card_decks();
- clear_cards(NURSERY,TENURED);
- clear_decks(NURSERY,TENURED);
- clear_allot_markers(NURSERY,TENURED);
-}
-
-void gc_reset(void)
-{
- int i;
- for(i = 0; i < MAX_GEN_COUNT; i++)
- memset(&gc_stats[i],0,sizeof(F_GC_STATS));
-
- cards_scanned = 0;
- decks_scanned = 0;
- code_heap_scans = 0;
-}
-
-void init_data_heap(CELL gens,
- CELL young_size,
- CELL aging_size,
- CELL tenured_size,
- bool secure_gc_)
-{
- set_data_heap(alloc_data_heap(gens,young_size,aging_size,tenured_size));
-
- gc_locals_region = alloc_segment(getpagesize());
- gc_locals = gc_locals_region->start - CELLS;
-
- extra_roots_region = alloc_segment(getpagesize());
- extra_roots = extra_roots_region->start - CELLS;
-
- secure_gc = secure_gc_;
-
- gc_reset();
-}
-
-/* Size of the object pointed to by a tagged pointer */
-CELL object_size(CELL tagged)
-{
- if(immediate_p(tagged))
- return 0;
- else
- return untagged_object_size(UNTAG(tagged));
-}
-
-/* Size of the object pointed to by an untagged pointer */
-CELL untagged_object_size(CELL pointer)
-{
- return align8(unaligned_object_size(pointer));
-}
-
-/* Size of the data area of an object pointed to by an untagged pointer */
-CELL unaligned_object_size(CELL pointer)
-{
- F_TUPLE *tuple;
- F_TUPLE_LAYOUT *layout;
-
- switch(untag_header(get(pointer)))
- {
- case ARRAY_TYPE:
- case BIGNUM_TYPE:
- return array_size(array_capacity((F_ARRAY*)pointer));
- case BYTE_ARRAY_TYPE:
- return byte_array_size(
- byte_array_capacity((F_BYTE_ARRAY*)pointer));
- case STRING_TYPE:
- return string_size(string_capacity((F_STRING*)pointer));
- case TUPLE_TYPE:
- tuple = untag_object(pointer);
- layout = untag_object(tuple->layout);
- return tuple_size(layout);
- case QUOTATION_TYPE:
- return sizeof(F_QUOTATION);
- case WORD_TYPE:
- return sizeof(F_WORD);
- case RATIO_TYPE:
- return sizeof(F_RATIO);
- case FLOAT_TYPE:
- return sizeof(F_FLOAT);
- case COMPLEX_TYPE:
- return sizeof(F_COMPLEX);
- case DLL_TYPE:
- return sizeof(F_DLL);
- case ALIEN_TYPE:
- return sizeof(F_ALIEN);
- case WRAPPER_TYPE:
- return sizeof(F_WRAPPER);
- case CALLSTACK_TYPE:
- return callstack_size(
- untag_fixnum_fast(((F_CALLSTACK *)pointer)->length));
- default:
- critical_error("Invalid header",pointer);
- return -1; /* can't happen */
- }
-}
-
-void primitive_size(void)
-{
- box_unsigned_cell(object_size(dpop()));
-}
-
-/* Push memory usage statistics in data heap */
-void primitive_data_room(void)
-{
- F_ARRAY *a = allot_array(ARRAY_TYPE,data_heap->gen_count * 2,F);
- int gen;
-
- dpush(tag_fixnum((data_heap->cards_end - data_heap->cards) >> 10));
- dpush(tag_fixnum((data_heap->decks_end - data_heap->decks) >> 10));
-
- for(gen = 0; gen < data_heap->gen_count; gen++)
- {
- F_ZONE *z = (gen == NURSERY ? &nursery : &data_heap->generations[gen]);
- set_array_nth(a,gen * 2,tag_fixnum((z->end - z->here) >> 10));
- set_array_nth(a,gen * 2 + 1,tag_fixnum((z->size) >> 10));
- }
-
- dpush(tag_object(a));
-}
-
-/* Disables GC and activates next-object ( -- obj ) primitive */
-void begin_scan(void)
-{
- heap_scan_ptr = data_heap->generations[TENURED].start;
- gc_off = true;
-}
-
-void primitive_begin_scan(void)
-{
- gc();
- begin_scan();
-}
-
-CELL next_object(void)
-{
- if(!gc_off)
- general_error(ERROR_HEAP_SCAN,F,F,NULL);
-
- CELL value = get(heap_scan_ptr);
- CELL obj = heap_scan_ptr;
- CELL type;
-
- if(heap_scan_ptr >= data_heap->generations[TENURED].here)
- return F;
-
- type = untag_header(value);
- heap_scan_ptr += untagged_object_size(heap_scan_ptr);
-
- return RETAG(obj,type <= HEADER_TYPE ? type : OBJECT_TYPE);
-}
-
-/* Push object at heap scan cursor and advance; pushes f when done */
-void primitive_next_object(void)
-{
- dpush(next_object());
-}
-
-/* Re-enables GC */
-void primitive_end_scan(void)
-{
- gc_off = false;
-}
-
/* Scan all the objects in the card */
-void collect_card(F_CARD *ptr, CELL gen, CELL here)
+void copy_card(F_CARD *ptr, CELL gen, CELL here)
{
CELL card_scan = (CELL)CARD_TO_ADDR(ptr) + CARD_OFFSET(ptr);
CELL card_end = (CELL)CARD_TO_ADDR(ptr + 1);
if(here < card_end)
card_end = here;
- collect_next_loop(card_scan,&card_end);
+ copy_reachable_objects(card_scan,&card_end);
cards_scanned++;
}
-void collect_card_deck(F_DECK *deck, CELL gen, F_CARD mask, F_CARD unmask)
+void copy_card_deck(F_DECK *deck, CELL gen, F_CARD mask, F_CARD unmask)
{
F_CARD *first_card = DECK_TO_CARD(deck);
F_CARD *last_card = DECK_TO_CARD(deck + 1);
{
if(ptr[card] & mask)
{
- collect_card(&ptr[card],gen,here);
+ copy_card(&ptr[card],gen,here);
ptr[card] &= ~unmask;
}
}
}
/* Copy all newspace objects referenced from marked cards to the destination */
-void collect_gen_cards(CELL gen)
+void copy_gen_cards(CELL gen)
{
F_DECK *first_deck = ADDR_TO_DECK(data_heap->generations[gen].start);
F_DECK *last_deck = ADDR_TO_DECK(data_heap->generations[gen].end);
unmask = CARD_MARK_MASK;
else
{
- critical_error("bug in collect_gen_cards",gen);
+ critical_error("bug in copy_gen_cards",gen);
return;
}
}
}
else
{
- critical_error("bug in collect_gen_cards",gen);
+ critical_error("bug in copy_gen_cards",gen);
return;
}
{
if(*ptr & mask)
{
- collect_card_deck(ptr,gen,mask,unmask);
+ copy_card_deck(ptr,gen,mask,unmask);
*ptr &= ~unmask;
}
}
/* Scan cards in all generations older than the one being collected, copying
old->new references */
-void collect_cards(void)
+void copy_cards(void)
{
int i;
for(i = collecting_gen + 1; i < data_heap->gen_count; i++)
- collect_gen_cards(i);
+ copy_gen_cards(i);
}
/* Copy all tagged pointers in a range of memory */
-void collect_stack(F_SEGMENT *region, CELL top)
+void copy_stack_elements(F_SEGMENT *region, CELL top)
{
CELL ptr = region->start;
copy_handle((CELL*)ptr);
}
-void collect_stack_frame(F_STACK_FRAME *frame)
-{
- recursive_mark(compiled_to_block(frame_code(frame)));
-}
-
-/* The base parameter allows us to adjust for a heap-allocated
-callstack snapshot */
-void collect_callstack(F_CONTEXT *stacks)
-{
- if(collecting_gen == TENURED)
- {
- CELL top = (CELL)stacks->callstack_top;
- CELL bottom = (CELL)stacks->callstack_bottom;
-
- iterate_callstack(top,bottom,collect_stack_frame);
- }
-}
-
-void collect_gc_locals(void)
+void copy_registered_locals(void)
{
CELL ptr = gc_locals_region->start;
/* Copy roots over at the start of GC, namely various constants, stacks,
the user environment and extra roots registered with REGISTER_ROOT */
-void collect_roots(void)
+void copy_roots(void)
{
copy_handle(&T);
copy_handle(&bignum_zero);
copy_handle(&bignum_pos_one);
copy_handle(&bignum_neg_one);
- collect_gc_locals();
- collect_stack(extra_roots_region,extra_roots);
+ copy_registered_locals();
+ copy_stack_elements(extra_roots_region,extra_roots);
save_stacks();
F_CONTEXT *stacks = stack_chain;
while(stacks)
{
- collect_stack(stacks->datastack_region,stacks->datastack);
- collect_stack(stacks->retainstack_region,stacks->retainstack);
+ copy_stack_elements(stacks->datastack_region,stacks->datastack);
+ copy_stack_elements(stacks->retainstack_region,stacks->retainstack);
copy_handle(&stacks->catchstack_save);
copy_handle(&stacks->current_callback_save);
- collect_callstack(stacks);
+ mark_active_blocks(stacks);
stacks = stacks->next;
}
*handle = copy_object(pointer);
}
-/* The number of cells from the start of the object which should be scanned by
-the GC. Some types have a binary payload at the end (string, word, DLL) which
-we ignore. */
-CELL binary_payload_start(CELL pointer)
-{
- F_TUPLE *tuple;
- F_TUPLE_LAYOUT *layout;
-
- switch(untag_header(get(pointer)))
- {
- /* these objects do not refer to other objects at all */
- case FLOAT_TYPE:
- case BYTE_ARRAY_TYPE:
- case BIGNUM_TYPE:
- case CALLSTACK_TYPE:
- return 0;
- /* these objects have some binary data at the end */
- case WORD_TYPE:
- return sizeof(F_WORD) - CELLS * 3;
- case ALIEN_TYPE:
- return CELLS * 3;
- case DLL_TYPE:
- return CELLS * 2;
- case QUOTATION_TYPE:
- return sizeof(F_QUOTATION) - CELLS * 2;
- case STRING_TYPE:
- return sizeof(F_STRING);
- /* everything else consists entirely of pointers */
- case ARRAY_TYPE:
- return array_size(array_capacity((F_ARRAY*)pointer));
- case TUPLE_TYPE:
- tuple = untag_object(pointer);
- layout = untag_object(tuple->layout);
- return tuple_size(layout);
- case RATIO_TYPE:
- return sizeof(F_RATIO);
- case COMPLEX_TYPE:
- return sizeof(F_COMPLEX);
- case WRAPPER_TYPE:
- return sizeof(F_WRAPPER);
- default:
- critical_error("Invalid header",pointer);
- return -1; /* can't happen */
- }
-}
-
-void do_code_slots(CELL scan)
-{
- F_WORD *word;
- F_QUOTATION *quot;
- F_CALLSTACK *stack;
-
- switch(object_type(scan))
- {
- case WORD_TYPE:
- word = (F_WORD *)scan;
- recursive_mark(compiled_to_block(word->code));
- if(word->profiling)
- recursive_mark(compiled_to_block(word->profiling));
- break;
- case QUOTATION_TYPE:
- quot = (F_QUOTATION *)scan;
- if(quot->compiledp != F)
- recursive_mark(compiled_to_block(quot->code));
- break;
- case CALLSTACK_TYPE:
- stack = (F_CALLSTACK *)scan;
- iterate_callstack_object(stack,collect_stack_frame);
- break;
- }
-}
-
-CELL collect_next_nursery(CELL scan)
+CELL copy_next_from_nursery(CELL scan)
{
CELL *obj = (CELL *)scan;
CELL *end = (CELL *)(scan + binary_payload_start(scan));
return scan + untagged_object_size(scan);
}
-CELL collect_next_aging(CELL scan)
+CELL copy_next_from_aging(CELL scan)
{
CELL *obj = (CELL *)scan;
CELL *end = (CELL *)(scan + binary_payload_start(scan));
return scan + untagged_object_size(scan);
}
-/* This function is performance-critical */
-CELL collect_next_tenured(CELL scan)
+CELL copy_next_from_tenured(CELL scan)
{
CELL *obj = (CELL *)scan;
CELL *end = (CELL *)(scan + binary_payload_start(scan));
}
}
- do_code_slots(scan);
+ mark_object_code_block(scan);
return scan + untagged_object_size(scan);
}
-void collect_next_loop(CELL scan, CELL *end)
+void copy_reachable_objects(CELL scan, CELL *end)
{
if(HAVE_NURSERY_P && collecting_gen == NURSERY)
{
while(scan < *end)
- scan = collect_next_nursery(scan);
+ scan = copy_next_from_nursery(scan);
}
else if(HAVE_AGING_P && collecting_gen == AGING)
{
while(scan < *end)
- scan = collect_next_aging(scan);
+ scan = copy_next_from_aging(scan);
}
else if(collecting_gen == TENURED)
{
while(scan < *end)
- scan = collect_next_tenured(scan);
+ scan = copy_next_from_tenured(scan);
}
}
-INLINE void reset_generation(CELL i)
-{
- F_ZONE *z = (i == NURSERY ? &nursery : &data_heap->generations[i]);
-
- z->here = z->start;
- if(secure_gc)
- memset((void*)z->start,69,z->size);
-}
-
-/* After garbage collection, any generations which are now empty need to have
-their allocation pointers and cards reset. */
-void reset_generations(CELL from, CELL to)
-{
- CELL i;
- for(i = from; i <= to; i++)
- reset_generation(i);
-
- clear_cards(from,to);
- clear_decks(from,to);
- clear_allot_markers(from,to);
-}
-
/* Prepare to start copying reachable objects into an unused zone */
void begin_gc(CELL requested_bytes)
{
CELL scan = newspace->here;
/* collect objects referenced from stacks and environment */
- collect_roots();
+ copy_roots();
/* collect objects referenced from older generations */
- collect_cards();
+ copy_cards();
+ /* do some tracing */
+ copy_reachable_objects(scan,&newspace->here);
/* don't scan code heap unless it has pointers to this
generation or younger */
if(collecting_gen >= last_code_heap_scan)
{
- if(collecting_gen != TENURED)
- {
-
- /* if we are doing code GC, then we will copy over
- literals from any code block which gets marked as live.
- if we are not doing code GC, just consider all literals
- as roots. */
- code_heap_scans++;
-
- collect_literals();
- }
+ code_heap_scans++;
+
+ if(collecting_gen == TENURED)
+ update_code_heap_roots();
+ else
+ copy_code_heap_roots();
if(collecting_accumulation_gen_p())
last_code_heap_scan = collecting_gen;
last_code_heap_scan = collecting_gen + 1;
}
- collect_next_loop(scan,&newspace->here);
-
CELL gc_elapsed = (current_micros() - start);
end_gc(gc_elapsed);
dpush(stats);
}
-void primitive_gc_reset(void)
+void clear_gc_stats(void)
{
- gc_reset();
+ int i;
+ for(i = 0; i < MAX_GEN_COUNT; i++)
+ memset(&gc_stats[i],0,sizeof(F_GC_STATS));
+
+ cards_scanned = 0;
+ decks_scanned = 0;
+ code_heap_scans = 0;
+}
+
+void primitive_clear_gc_stats(void)
+{
+ clear_gc_stats();
}
void primitive_become(void)
compile_all_words();
}
-
-CELL find_all_words(void)
-{
- GROWABLE_ARRAY(words);
-
- begin_scan();
-
- CELL obj;
- while((obj = next_object()) != F)
- {
- if(type_of(obj) == WORD_TYPE)
- GROWABLE_ARRAY_ADD(words,obj);
- }
-
- /* End heap scan */
- gc_off = false;
-
- GROWABLE_ARRAY_TRIM(words);
-
- return words;
-}
-/* Set by the -S command line argument */
-bool secure_gc;
-
-/* set up guard pages to check for under/overflow.
-size must be a multiple of the page size */
-F_SEGMENT *alloc_segment(CELL size);
-void dealloc_segment(F_SEGMENT *block);
-
-CELL untagged_object_size(CELL pointer);
-CELL unaligned_object_size(CELL pointer);
-CELL object_size(CELL pointer);
-CELL binary_payload_start(CELL pointer);
-void begin_scan(void);
-CELL next_object(void);
-
-void primitive_data_room(void);
-void primitive_size(void);
-void primitive_begin_scan(void);
-void primitive_next_object(void);
-void primitive_end_scan(void);
-
void gc(void);
DLLEXPORT void minor_gc(void);
-/* generational copying GC divides memory into zones */
-typedef struct {
- /* allocation pointer is 'here'; its offset is hardcoded in the
- compiler backends, see core/compiler/.../allot.factor */
- CELL start;
- CELL here;
- CELL size;
- CELL end;
-} F_ZONE;
-
-typedef struct {
- F_SEGMENT *segment;
-
- CELL young_size;
- CELL aging_size;
- CELL tenured_size;
-
- CELL gen_count;
-
- F_ZONE *generations;
- F_ZONE* semispaces;
-
- CELL *allot_markers;
- CELL *allot_markers_end;
-
- CELL *cards;
- CELL *cards_end;
-
- CELL *decks;
- CELL *decks_end;
-} F_DATA_HEAP;
-
-F_DATA_HEAP *data_heap;
-
-/* card marking write barrier. a card is a byte storing a mark flag,
-and the offset (in cells) of the first object in the card.
-
-the mark flag is set by the write barrier when an object in the
-card has a slot written to.
-
-the offset of the first object is set by the allocator. */
-
-/* if CARD_POINTS_TO_NURSERY is set, CARD_POINTS_TO_AGING must also be set. */
-#define CARD_POINTS_TO_NURSERY 0x80
-#define CARD_POINTS_TO_AGING 0x40
-#define CARD_MARK_MASK (CARD_POINTS_TO_NURSERY | CARD_POINTS_TO_AGING)
-typedef u8 F_CARD;
-
-#define CARD_BITS 8
-#define CARD_SIZE (1<<CARD_BITS)
-#define ADDR_CARD_MASK (CARD_SIZE-1)
-
-DLLEXPORT CELL cards_offset;
-
-#define ADDR_TO_CARD(a) (F_CARD*)(((CELL)(a) >> CARD_BITS) + cards_offset)
-#define CARD_TO_ADDR(c) (CELL*)(((CELL)(c) - cards_offset)<<CARD_BITS)
-
-typedef u8 F_DECK;
-
-#define DECK_BITS (CARD_BITS + 10)
-#define DECK_SIZE (1<<DECK_BITS)
-#define ADDR_DECK_MASK (DECK_SIZE-1)
-
-DLLEXPORT CELL decks_offset;
-
-#define ADDR_TO_DECK(a) (F_DECK*)(((CELL)(a) >> DECK_BITS) + decks_offset)
-#define DECK_TO_ADDR(c) (CELL*)(((CELL)(c) - decks_offset) << DECK_BITS)
-
-#define DECK_TO_CARD(d) (F_CARD*)((((CELL)(d) - decks_offset) << (DECK_BITS - CARD_BITS)) + cards_offset)
-
-#define ADDR_TO_ALLOT_MARKER(a) (F_CARD*)(((CELL)(a) >> CARD_BITS) + allot_markers_offset)
-#define CARD_OFFSET(c) (*((c) - (CELL)data_heap->cards + (CELL)data_heap->allot_markers))
-
-#define INVALID_ALLOT_MARKER 0xff
-
-DLLEXPORT CELL allot_markers_offset;
-
-void init_card_decks(void);
-
-/* the write barrier must be called any time we are potentially storing a
-pointer from an older generation to a younger one */
-INLINE void write_barrier(CELL address)
-{
- *ADDR_TO_CARD(address) = CARD_MARK_MASK;
- *ADDR_TO_DECK(address) = CARD_MARK_MASK;
-}
-
-#define SLOT(obj,slot) (UNTAG(obj) + (slot) * CELLS)
-
-INLINE void set_slot(CELL obj, CELL slot, CELL value)
-{
- put(SLOT(obj,slot),value);
- write_barrier(obj);
-}
-
-/* we need to remember the first object allocated in the card */
-INLINE void allot_barrier(CELL address)
-{
- F_CARD *ptr = ADDR_TO_ALLOT_MARKER(address);
- if(*ptr == INVALID_ALLOT_MARKER)
- *ptr = (address & ADDR_CARD_MASK);
-}
-
-void clear_cards(CELL from, CELL to);
-void collect_cards(void);
-
-/* the 0th generation is where new objects are allocated. */
-#define NURSERY 0
-#define HAVE_NURSERY_P (data_heap->gen_count>1)
-/* where objects hang around */
-#define AGING (data_heap->gen_count-2)
-#define HAVE_AGING_P (data_heap->gen_count>2)
-/* the oldest generation */
-#define TENURED (data_heap->gen_count-1)
-
-#define MIN_GEN_COUNT 1
-#define MAX_GEN_COUNT 3
-
/* used during garbage collection only */
-F_ZONE *newspace;
-/* new objects are allocated here */
-DLLEXPORT F_ZONE nursery;
-
-INLINE bool in_zone(F_ZONE *z, CELL pointer)
-{
- return pointer >= z->start && pointer < z->end;
-}
+F_ZONE *newspace;
+bool performing_gc;
+CELL collecting_gen;
-CELL init_zone(F_ZONE *z, CELL size, CELL base);
+/* if true, we collecting AGING space for the second time, so if it is still
+full, we go on to collect TENURED */
+bool collecting_aging_again;
-void init_data_heap(CELL gens,
- CELL young_size,
- CELL aging_size,
- CELL tenured_size,
- bool secure_gc_);
+/* in case a generation fills up in the middle of a gc, we jump back
+up to try collecting the next generation. */
+jmp_buf gc_jmp;
/* statistics */
typedef struct {
u64 decks_scanned;
CELL code_heap_scans;
-/* only meaningful during a GC */
-bool performing_gc;
-CELL collecting_gen;
-
-/* if true, we collecting AGING space for the second time, so if it is still
-full, we go on to collect TENURED */
-bool collecting_aging_again;
-
-INLINE bool collecting_accumulation_gen_p(void)
-{
- return ((HAVE_AGING_P
- && collecting_gen == AGING
- && !collecting_aging_again)
- || collecting_gen == TENURED);
-}
-
-/* What generation was being collected when collect_literals() was last
-called? Until the next call to primitive_add_compiled_block(), future
+/* What generation was being collected when copy_code_heap_roots() was last
+called? Until the next call to add_compiled_block(), future
collections of younger generations don't have to touch the code
heap. */
CELL last_code_heap_scan;
bool growing_data_heap;
F_DATA_HEAP *old_data_heap;
-/* Every object has a regular representation in the runtime, which makes GC
-much simpler. Every slot of the object until binary_payload_start is a pointer
-to some other object. */
-INLINE void do_slots(CELL obj, void (* iter)(CELL *))
+INLINE bool collecting_accumulation_gen_p(void)
{
- CELL scan = obj;
- CELL payload_start = binary_payload_start(obj);
- CELL end = obj + payload_start;
-
- scan += CELLS;
-
- while(scan < end)
- {
- iter((CELL *)scan);
- scan += CELLS;
- }
+ return ((HAVE_AGING_P
+ && collecting_gen == AGING
+ && !collecting_aging_again)
+ || collecting_gen == TENURED);
}
/* test if the pointer is in generation being collected, or a younger one. */
void copy_handle(CELL *handle);
-/* in case a generation fills up in the middle of a gc, we jump back
-up to try collecting the next generation. */
-jmp_buf gc_jmp;
-
-/* A heap walk allows useful things to be done, like finding all
-references to an object for debugging purposes. */
-CELL heap_scan_ptr;
-
-/* GC is off during heap walking */
-bool gc_off;
-
void garbage_collection(volatile CELL gen,
bool growing_data_heap_,
CELL requested_bytes);
-/* If a runtime function needs to call another function which potentially
-allocates memory, it must store any local variable references to Factor
-objects on the root stack */
-
-/* GC locals: stores addresses of pointers to objects. The GC updates these
-pointers, so you can do
-
-REGISTER_ROOT(some_local);
-
-... allocate memory ...
-
-foo(some_local);
-
-...
-
-UNREGISTER_ROOT(some_local); */
-F_SEGMENT *gc_locals_region;
-CELL gc_locals;
-
-DEFPUSHPOP(gc_local_,gc_locals)
-
-#define REGISTER_ROOT(obj) gc_local_push((CELL)&obj)
-#define UNREGISTER_ROOT(obj) \
- { \
- if(gc_local_pop() != (CELL)&obj) \
- critical_error("Mismatched REGISTER_ROOT/UNREGISTER_ROOT",0); \
- }
-
-/* Extra roots: stores pointers to objects in the heap. Requires extra work
-(you have to unregister before accessing the object) but more flexible. */
-F_SEGMENT *extra_roots_region;
-CELL extra_roots;
-
-DEFPUSHPOP(root_,extra_roots)
-
-#define REGISTER_UNTAGGED(obj) root_push(obj ? tag_object(obj) : 0)
-#define UNREGISTER_UNTAGGED(obj) obj = untag_object(root_pop())
-
-INLINE bool in_data_heap_p(CELL ptr)
-{
- return (ptr >= data_heap->segment->start
- && ptr <= data_heap->segment->end);
-}
-
-/* We ignore strings which point outside the data heap, but we might be given
-a char* which points inside the data heap, in which case it is a root, for
-example if we call unbox_char_string() the result is placed in a byte array */
-INLINE bool root_push_alien(const void *ptr)
-{
- if(in_data_heap_p((CELL)ptr))
- {
- F_BYTE_ARRAY *objptr = ((F_BYTE_ARRAY *)ptr) - 1;
- if(objptr->header == tag_header(BYTE_ARRAY_TYPE))
- {
- root_push(tag_object(objptr));
- return true;
- }
- }
-
- return false;
-}
-
-#define REGISTER_C_STRING(obj) \
- bool obj##_root = root_push_alien(obj)
-#define UNREGISTER_C_STRING(obj) \
- if(obj##_root) obj = alien_offset(root_pop())
-
-#define REGISTER_BIGNUM(obj) if(obj) root_push(tag_bignum(obj))
-#define UNREGISTER_BIGNUM(obj) if(obj) obj = (untag_object(root_pop()))
-
-INLINE void *allot_zone(F_ZONE *z, CELL a)
-{
- CELL h = z->here;
- z->here = h + align8(a);
- return (void*)h;
-}
-
/* We leave this many bytes free at the top of the nursery so that inline
allocation (which does not call GC because of possible roots in volatile
registers) does not run out of memory */
* It is up to the caller to fill in the object's fields in a meaningful
* fashion!
*/
-INLINE void* allot_object(CELL type, CELL a)
+INLINE void *allot_object(CELL type, CELL a)
{
CELL *object;
return object;
}
-void collect_next_loop(CELL scan, CELL *end);
+void copy_reachable_objects(CELL scan, CELL *end);
void primitive_gc(void);
void primitive_gc_stats(void);
-void primitive_gc_reset(void);
+void clear_gc_stats(void);
+void primitive_clear_gc_stats(void);
void primitive_become(void);
-
-CELL find_all_words(void);
--- /dev/null
+#include "master.h"
+
+CELL init_zone(F_ZONE *z, CELL size, CELL start)
+{
+ z->size = size;
+ z->start = z->here = start;
+ z->end = start + size;
+ return z->end;
+}
+
+void init_card_decks(void)
+{
+ CELL start = align(data_heap->segment->start,DECK_SIZE);
+ allot_markers_offset = (CELL)data_heap->allot_markers - (start >> CARD_BITS);
+ cards_offset = (CELL)data_heap->cards - (start >> CARD_BITS);
+ decks_offset = (CELL)data_heap->decks - (start >> DECK_BITS);
+}
+
+F_DATA_HEAP *alloc_data_heap(CELL gens,
+ CELL young_size,
+ CELL aging_size,
+ CELL tenured_size)
+{
+ young_size = align(young_size,DECK_SIZE);
+ aging_size = align(aging_size,DECK_SIZE);
+ tenured_size = align(tenured_size,DECK_SIZE);
+
+ F_DATA_HEAP *data_heap = safe_malloc(sizeof(F_DATA_HEAP));
+ data_heap->young_size = young_size;
+ data_heap->aging_size = aging_size;
+ data_heap->tenured_size = tenured_size;
+ data_heap->gen_count = gens;
+
+ CELL total_size;
+ if(data_heap->gen_count == 2)
+ total_size = young_size + 2 * tenured_size;
+ else if(data_heap->gen_count == 3)
+ total_size = young_size + 2 * aging_size + 2 * tenured_size;
+ else
+ {
+ fatal_error("Invalid number of generations",data_heap->gen_count);
+ return NULL; /* can't happen */
+ }
+
+ total_size += DECK_SIZE;
+
+ data_heap->segment = alloc_segment(total_size);
+
+ data_heap->generations = safe_malloc(sizeof(F_ZONE) * data_heap->gen_count);
+ data_heap->semispaces = safe_malloc(sizeof(F_ZONE) * data_heap->gen_count);
+
+ CELL cards_size = total_size >> CARD_BITS;
+ data_heap->allot_markers = safe_malloc(cards_size);
+ data_heap->allot_markers_end = data_heap->allot_markers + cards_size;
+
+ data_heap->cards = safe_malloc(cards_size);
+ data_heap->cards_end = data_heap->cards + cards_size;
+
+ CELL decks_size = total_size >> DECK_BITS;
+ data_heap->decks = safe_malloc(decks_size);
+ data_heap->decks_end = data_heap->decks + decks_size;
+
+ CELL alloter = align(data_heap->segment->start,DECK_SIZE);
+
+ alloter = init_zone(&data_heap->generations[TENURED],tenured_size,alloter);
+ alloter = init_zone(&data_heap->semispaces[TENURED],tenured_size,alloter);
+
+ if(data_heap->gen_count == 3)
+ {
+ alloter = init_zone(&data_heap->generations[AGING],aging_size,alloter);
+ alloter = init_zone(&data_heap->semispaces[AGING],aging_size,alloter);
+ }
+
+ if(data_heap->gen_count >= 2)
+ {
+ alloter = init_zone(&data_heap->generations[NURSERY],young_size,alloter);
+ alloter = init_zone(&data_heap->semispaces[NURSERY],0,alloter);
+ }
+
+ if(data_heap->segment->end - alloter > DECK_SIZE)
+ critical_error("Bug in alloc_data_heap",alloter);
+
+ return data_heap;
+}
+
+F_DATA_HEAP *grow_data_heap(F_DATA_HEAP *data_heap, CELL requested_bytes)
+{
+ CELL new_tenured_size = (data_heap->tenured_size * 2) + requested_bytes;
+
+ return alloc_data_heap(data_heap->gen_count,
+ data_heap->young_size,
+ data_heap->aging_size,
+ new_tenured_size);
+}
+
+void dealloc_data_heap(F_DATA_HEAP *data_heap)
+{
+ dealloc_segment(data_heap->segment);
+ free(data_heap->generations);
+ free(data_heap->semispaces);
+ free(data_heap->allot_markers);
+ free(data_heap->cards);
+ free(data_heap->decks);
+ free(data_heap);
+}
+
+void clear_cards(CELL from, CELL to)
+{
+ /* NOTE: reverse order due to heap layout. */
+ F_CARD *first_card = ADDR_TO_CARD(data_heap->generations[to].start);
+ F_CARD *last_card = ADDR_TO_CARD(data_heap->generations[from].end);
+ memset(first_card,0,last_card - first_card);
+}
+
+void clear_decks(CELL from, CELL to)
+{
+ /* NOTE: reverse order due to heap layout. */
+ F_DECK *first_deck = ADDR_TO_DECK(data_heap->generations[to].start);
+ F_DECK *last_deck = ADDR_TO_DECK(data_heap->generations[from].end);
+ memset(first_deck,0,last_deck - first_deck);
+}
+
+void clear_allot_markers(CELL from, CELL to)
+{
+ /* NOTE: reverse order due to heap layout. */
+ F_CARD *first_card = ADDR_TO_ALLOT_MARKER(data_heap->generations[to].start);
+ F_CARD *last_card = ADDR_TO_ALLOT_MARKER(data_heap->generations[from].end);
+ memset(first_card,INVALID_ALLOT_MARKER,last_card - first_card);
+}
+
+void reset_generation(CELL i)
+{
+ F_ZONE *z = (i == NURSERY ? &nursery : &data_heap->generations[i]);
+
+ z->here = z->start;
+ if(secure_gc)
+ memset((void*)z->start,69,z->size);
+}
+
+/* After garbage collection, any generations which are now empty need to have
+their allocation pointers and cards reset. */
+void reset_generations(CELL from, CELL to)
+{
+ CELL i;
+ for(i = from; i <= to; i++)
+ reset_generation(i);
+
+ clear_cards(from,to);
+ clear_decks(from,to);
+ clear_allot_markers(from,to);
+}
+
+void set_data_heap(F_DATA_HEAP *data_heap_)
+{
+ data_heap = data_heap_;
+ nursery = data_heap->generations[NURSERY];
+ init_card_decks();
+ clear_cards(NURSERY,TENURED);
+ clear_decks(NURSERY,TENURED);
+ clear_allot_markers(NURSERY,TENURED);
+}
+
+void init_data_heap(CELL gens,
+ CELL young_size,
+ CELL aging_size,
+ CELL tenured_size,
+ bool secure_gc_)
+{
+ set_data_heap(alloc_data_heap(gens,young_size,aging_size,tenured_size));
+
+ gc_locals_region = alloc_segment(getpagesize());
+ gc_locals = gc_locals_region->start - CELLS;
+
+ extra_roots_region = alloc_segment(getpagesize());
+ extra_roots = extra_roots_region->start - CELLS;
+
+ secure_gc = secure_gc_;
+}
+
+/* Size of the object pointed to by a tagged pointer */
+CELL object_size(CELL tagged)
+{
+ if(immediate_p(tagged))
+ return 0;
+ else
+ return untagged_object_size(UNTAG(tagged));
+}
+
+/* Size of the object pointed to by an untagged pointer */
+CELL untagged_object_size(CELL pointer)
+{
+ return align8(unaligned_object_size(pointer));
+}
+
+/* Size of the data area of an object pointed to by an untagged pointer */
+CELL unaligned_object_size(CELL pointer)
+{
+ F_TUPLE *tuple;
+ F_TUPLE_LAYOUT *layout;
+
+ switch(untag_header(get(pointer)))
+ {
+ case ARRAY_TYPE:
+ case BIGNUM_TYPE:
+ return array_size(array_capacity((F_ARRAY*)pointer));
+ case BYTE_ARRAY_TYPE:
+ return byte_array_size(
+ byte_array_capacity((F_BYTE_ARRAY*)pointer));
+ case STRING_TYPE:
+ return string_size(string_capacity((F_STRING*)pointer));
+ case TUPLE_TYPE:
+ tuple = untag_object(pointer);
+ layout = untag_object(tuple->layout);
+ return tuple_size(layout);
+ case QUOTATION_TYPE:
+ return sizeof(F_QUOTATION);
+ case WORD_TYPE:
+ return sizeof(F_WORD);
+ case RATIO_TYPE:
+ return sizeof(F_RATIO);
+ case FLOAT_TYPE:
+ return sizeof(F_FLOAT);
+ case COMPLEX_TYPE:
+ return sizeof(F_COMPLEX);
+ case DLL_TYPE:
+ return sizeof(F_DLL);
+ case ALIEN_TYPE:
+ return sizeof(F_ALIEN);
+ case WRAPPER_TYPE:
+ return sizeof(F_WRAPPER);
+ case CALLSTACK_TYPE:
+ return callstack_size(
+ untag_fixnum_fast(((F_CALLSTACK *)pointer)->length));
+ default:
+ critical_error("Invalid header",pointer);
+ return -1; /* can't happen */
+ }
+}
+
+void primitive_size(void)
+{
+ box_unsigned_cell(object_size(dpop()));
+}
+
+/* The number of cells from the start of the object which should be scanned by
+the GC. Some types have a binary payload at the end (string, word, DLL) which
+we ignore. */
+CELL binary_payload_start(CELL pointer)
+{
+ F_TUPLE *tuple;
+ F_TUPLE_LAYOUT *layout;
+
+ switch(untag_header(get(pointer)))
+ {
+ /* these objects do not refer to other objects at all */
+ case FLOAT_TYPE:
+ case BYTE_ARRAY_TYPE:
+ case BIGNUM_TYPE:
+ case CALLSTACK_TYPE:
+ return 0;
+ /* these objects have some binary data at the end */
+ case WORD_TYPE:
+ return sizeof(F_WORD) - CELLS * 3;
+ case ALIEN_TYPE:
+ return CELLS * 3;
+ case DLL_TYPE:
+ return CELLS * 2;
+ case QUOTATION_TYPE:
+ return sizeof(F_QUOTATION) - CELLS * 2;
+ case STRING_TYPE:
+ return sizeof(F_STRING);
+ /* everything else consists entirely of pointers */
+ case ARRAY_TYPE:
+ return array_size(array_capacity((F_ARRAY*)pointer));
+ case TUPLE_TYPE:
+ tuple = untag_object(pointer);
+ layout = untag_object(tuple->layout);
+ return tuple_size(layout);
+ case RATIO_TYPE:
+ return sizeof(F_RATIO);
+ case COMPLEX_TYPE:
+ return sizeof(F_COMPLEX);
+ case WRAPPER_TYPE:
+ return sizeof(F_WRAPPER);
+ default:
+ critical_error("Invalid header",pointer);
+ return -1; /* can't happen */
+ }
+}
+
+/* Push memory usage statistics in data heap */
+void primitive_data_room(void)
+{
+ F_ARRAY *a = allot_array(ARRAY_TYPE,data_heap->gen_count * 2,F);
+ int gen;
+
+ dpush(tag_fixnum((data_heap->cards_end - data_heap->cards) >> 10));
+ dpush(tag_fixnum((data_heap->decks_end - data_heap->decks) >> 10));
+
+ for(gen = 0; gen < data_heap->gen_count; gen++)
+ {
+ F_ZONE *z = (gen == NURSERY ? &nursery : &data_heap->generations[gen]);
+ set_array_nth(a,gen * 2,tag_fixnum((z->end - z->here) >> 10));
+ set_array_nth(a,gen * 2 + 1,tag_fixnum((z->size) >> 10));
+ }
+
+ dpush(tag_object(a));
+}
+
+/* Disables GC and activates next-object ( -- obj ) primitive */
+void begin_scan(void)
+{
+ heap_scan_ptr = data_heap->generations[TENURED].start;
+ gc_off = true;
+}
+
+void primitive_begin_scan(void)
+{
+ begin_scan();
+}
+
+CELL next_object(void)
+{
+ if(!gc_off)
+ general_error(ERROR_HEAP_SCAN,F,F,NULL);
+
+ CELL value = get(heap_scan_ptr);
+ CELL obj = heap_scan_ptr;
+ CELL type;
+
+ if(heap_scan_ptr >= data_heap->generations[TENURED].here)
+ return F;
+
+ type = untag_header(value);
+ heap_scan_ptr += untagged_object_size(heap_scan_ptr);
+
+ return RETAG(obj,type <= HEADER_TYPE ? type : OBJECT_TYPE);
+}
+
+/* Push object at heap scan cursor and advance; pushes f when done */
+void primitive_next_object(void)
+{
+ dpush(next_object());
+}
+
+/* Re-enables GC */
+void primitive_end_scan(void)
+{
+ gc_off = false;
+}
+
+CELL find_all_words(void)
+{
+ GROWABLE_ARRAY(words);
+
+ begin_scan();
+
+ CELL obj;
+ while((obj = next_object()) != F)
+ {
+ if(type_of(obj) == WORD_TYPE)
+ GROWABLE_ARRAY_ADD(words,obj);
+ }
+
+ /* End heap scan */
+ gc_off = false;
+
+ GROWABLE_ARRAY_TRIM(words);
+
+ return words;
+}
--- /dev/null
+/* Set by the -securegc command line argument */
+bool secure_gc;
+
+/* generational copying GC divides memory into zones */
+typedef struct {
+ /* allocation pointer is 'here'; its offset is hardcoded in the
+ compiler backends*/
+ CELL start;
+ CELL here;
+ CELL size;
+ CELL end;
+} F_ZONE;
+
+typedef struct {
+ F_SEGMENT *segment;
+
+ CELL young_size;
+ CELL aging_size;
+ CELL tenured_size;
+
+ CELL gen_count;
+
+ F_ZONE *generations;
+ F_ZONE* semispaces;
+
+ CELL *allot_markers;
+ CELL *allot_markers_end;
+
+ CELL *cards;
+ CELL *cards_end;
+
+ CELL *decks;
+ CELL *decks_end;
+} F_DATA_HEAP;
+
+F_DATA_HEAP *data_heap;
+
+/* the 0th generation is where new objects are allocated. */
+#define NURSERY 0
+#define HAVE_NURSERY_P (data_heap->gen_count>1)
+/* where objects hang around */
+#define AGING (data_heap->gen_count-2)
+#define HAVE_AGING_P (data_heap->gen_count>2)
+/* the oldest generation */
+#define TENURED (data_heap->gen_count-1)
+
+#define MIN_GEN_COUNT 1
+#define MAX_GEN_COUNT 3
+
+/* new objects are allocated here */
+DLLEXPORT F_ZONE nursery;
+
+INLINE bool in_zone(F_ZONE *z, CELL pointer)
+{
+ return pointer >= z->start && pointer < z->end;
+}
+
+CELL init_zone(F_ZONE *z, CELL size, CELL base);
+
+void init_card_decks(void);
+
+F_DATA_HEAP *grow_data_heap(F_DATA_HEAP *data_heap, CELL requested_bytes);
+
+void dealloc_data_heap(F_DATA_HEAP *data_heap);
+
+void clear_cards(CELL from, CELL to);
+void clear_decks(CELL from, CELL to);
+void clear_allot_markers(CELL from, CELL to);
+void reset_generation(CELL i);
+void reset_generations(CELL from, CELL to);
+
+void set_data_heap(F_DATA_HEAP *data_heap_);
+
+void init_data_heap(CELL gens,
+ CELL young_size,
+ CELL aging_size,
+ CELL tenured_size,
+ bool secure_gc_);
+
+/* set up guard pages to check for under/overflow.
+size must be a multiple of the page size */
+F_SEGMENT *alloc_segment(CELL size);
+void dealloc_segment(F_SEGMENT *block);
+
+CELL untagged_object_size(CELL pointer);
+CELL unaligned_object_size(CELL pointer);
+CELL object_size(CELL pointer);
+CELL binary_payload_start(CELL pointer);
+
+void begin_scan(void);
+CELL next_object(void);
+
+void primitive_data_room(void);
+void primitive_size(void);
+
+void primitive_begin_scan(void);
+void primitive_next_object(void);
+void primitive_end_scan(void);
+
+/* A heap walk allows useful things to be done, like finding all
+references to an object for debugging purposes. */
+CELL heap_scan_ptr;
+
+/* GC is off during heap walking */
+bool gc_off;
+
+INLINE bool in_data_heap_p(CELL ptr)
+{
+ return (ptr >= data_heap->segment->start
+ && ptr <= data_heap->segment->end);
+}
+
+INLINE void *allot_zone(F_ZONE *z, CELL a)
+{
+ CELL h = z->here;
+ z->here = h + align8(a);
+ return (void*)h;
+}
+
+CELL find_all_words(void);
+
+/* Every object has a regular representation in the runtime, which makes GC
+much simpler. Every slot of the object until binary_payload_start is a pointer
+to some other object. */
+INLINE void do_slots(CELL obj, void (* iter)(CELL *))
+{
+ CELL scan = obj;
+ CELL payload_start = binary_payload_start(obj);
+ CELL end = obj + payload_start;
+
+ scan += CELLS;
+
+ while(scan < end)
+ {
+ iter((CELL *)scan);
+ scan += CELLS;
+ }
+}
gc_off = false;
}
-CELL look_for;
-
-void find_code_references_step(F_COMPILED *compiled, CELL code_start, CELL literals_start)
+/* Dump all code blocks for debugging */
+void dump_code_heap(void)
{
- CELL scan;
- CELL literal_end = literals_start + compiled->literals_length;
-
- for(scan = literals_start; scan < literal_end; scan += CELLS)
- {
- CELL code_start = (CELL)(compiled + 1);
- CELL literal_start = code_start + compiled->code_length;
+ CELL size = 0;
- CELL obj = get(literal_start);
+ F_BLOCK *scan = first_block(&code_heap);
- if(look_for == get(scan))
+ while(scan)
+ {
+ char *status;
+ switch(scan->status)
{
- print_cell_hex_pad(obj);
- print_string(" ");
- print_nested_obj(obj,2);
- nl();
+ case B_FREE:
+ status = "free";
+ break;
+ case B_ALLOCATED:
+ size += object_size(block_to_compiled(scan)->relocation);
+ status = "allocated";
+ break;
+ case B_MARKED:
+ size += object_size(block_to_compiled(scan)->relocation);
+ status = "marked";
+ break;
+ default:
+ status = "invalid";
+ break;
}
- }
-}
-void find_code_references(CELL look_for_)
-{
- look_for = look_for_;
- iterate_code_heap(find_code_references_step);
+ print_cell_hex((CELL)scan); print_string(" ");
+ print_cell_hex(scan->size); print_string(" ");
+ print_string(status); print_string("\n");
+
+ scan = next_block(&code_heap,scan);
+ }
+
+ print_cell(size); print_string(" bytes of relocation data\n");
}
void factorbug(void)
CELL addr = read_cell_hex();
print_string("Data heap references:\n");
find_data_references(addr);
- print_string("Code heap references:\n");
- find_code_references(addr);
nl();
}
else if(strcmp(cmd,"words") == 0)
dpush(addr);
}
else if(strcmp(cmd,"code") == 0)
- dump_heap(&code_heap);
+ dump_code_heap();
else
print_string("unknown command\n");
}
void init_parameters_from_args(F_PARAMETERS *p, int argc, F_CHAR **argv)
{
default_parameters(p);
- const F_CHAR *executable_path = vm_executable_path();
- p->executable_path = executable_path ? executable_path : argv[0];
+ p->executable_path = argv[0];
int i = 0;
/* OS-specific initialization */
early_init();
+ const F_CHAR *executable_path = vm_executable_path();
+
+ if(executable_path)
+ p->executable_path = executable_path;
+
if(p->image_path == NULL)
p->image_path = default_image_path();
p->tenured_size,
p->secure_gc);
+ clear_gc_stats();
+
F_ZONE *tenured = &data_heap->generations[TENURED];
F_FIXNUM bytes_read = fread((void*)tenured->start,1,h->data_size,file);
}
}
-void fixup_code_block(F_COMPILED *compiled, CELL code_start, CELL literals_start)
+void fixup_code_block(F_CODE_BLOCK *compiled)
{
/* relocate literal table data */
- CELL scan;
- CELL literal_end = literals_start + compiled->literals_length;
-
data_fixup(&compiled->relocation);
+ data_fixup(&compiled->literals);
- for(scan = literals_start; scan < literal_end; scan += CELLS)
- data_fixup((CELL*)scan);
-
- relocate_code_block(compiled,code_start,literals_start);
+ relocate_code_block(compiled);
}
void relocate_code()
{
char type; /* this is WORD_TYPE or QUOTATION_TYPE */
char last_scan; /* the youngest generation in which this block's literals may live */
+ char needs_fixup; /* is this a new block that needs full fixup? */
CELL code_length; /* # bytes */
- CELL literals_length; /* # bytes */
+ CELL literals; /* # bytes */
CELL relocation; /* tagged pointer to byte-array or f */
-} F_COMPILED;
+} F_CODE_BLOCK;
/* Assembly code makes assumptions about the layout of this struct */
typedef struct {
CELL def;
/* TAGGED property assoc for library code */
CELL props;
- /* TAGGED t or f, depending on if the word is compiled or not */
- CELL compiledp;
+ /* TAGGED t or f, t means its compiled with the optimizing compiler,
+ f means its compiled with the non-optimizing compiler */
+ CELL optimizedp;
/* TAGGED call count for profiling */
CELL counter;
/* TAGGED machine code for sub-primitive */
/* UNTAGGED execution token: jump here to execute word */
XT xt;
/* UNTAGGED compiled code block */
- F_COMPILED *code;
+ F_CODE_BLOCK *code;
/* UNTAGGED profiler stub */
- F_COMPILED *profiling;
+ F_CODE_BLOCK *profiling;
} F_WORD;
/* Assembly code makes assumptions about the layout of this struct */
/* UNTAGGED */
XT xt;
/* UNTAGGED compiled code block */
- F_COMPILED *code;
+ F_CODE_BLOCK *code;
} F_QUOTATION;
/* Assembly code makes assumptions about the layout of this struct */
--- /dev/null
+/* If a runtime function needs to call another function which potentially
+allocates memory, it must store any local variable references to Factor
+objects on the root stack */
+
+/* GC locals: stores addresses of pointers to objects. The GC updates these
+pointers, so you can do
+
+REGISTER_ROOT(some_local);
+
+... allocate memory ...
+
+foo(some_local);
+
+...
+
+UNREGISTER_ROOT(some_local); */
+F_SEGMENT *gc_locals_region;
+CELL gc_locals;
+
+DEFPUSHPOP(gc_local_,gc_locals)
+
+#define REGISTER_ROOT(obj) gc_local_push((CELL)&obj)
+#define UNREGISTER_ROOT(obj) \
+ { \
+ if(gc_local_pop() != (CELL)&obj) \
+ critical_error("Mismatched REGISTER_ROOT/UNREGISTER_ROOT",0); \
+ }
+
+/* Extra roots: stores pointers to objects in the heap. Requires extra work
+(you have to unregister before accessing the object) but more flexible. */
+F_SEGMENT *extra_roots_region;
+CELL extra_roots;
+
+DEFPUSHPOP(root_,extra_roots)
+
+#define REGISTER_UNTAGGED(obj) root_push(obj ? tag_object(obj) : 0)
+#define UNREGISTER_UNTAGGED(obj) obj = untag_object(root_pop())
+
+/* We ignore strings which point outside the data heap, but we might be given
+a char* which points inside the data heap, in which case it is a root, for
+example if we call unbox_char_string() the result is placed in a byte array */
+INLINE bool root_push_alien(const void *ptr)
+{
+ if(in_data_heap_p((CELL)ptr))
+ {
+ F_BYTE_ARRAY *objptr = ((F_BYTE_ARRAY *)ptr) - 1;
+ if(objptr->header == tag_header(BYTE_ARRAY_TYPE))
+ {
+ root_push(tag_object(objptr));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+#define REGISTER_C_STRING(obj) \
+ bool obj##_root = root_push_alien(obj)
+#define UNREGISTER_C_STRING(obj) \
+ if(obj##_root) obj = alien_offset(root_pop())
+
+#define REGISTER_BIGNUM(obj) if(obj) root_push(tag_bignum(obj))
+#define UNREGISTER_BIGNUM(obj) if(obj) obj = (untag_object(root_pop()))
#include "errors.h"
#include "bignumint.h"
#include "bignum.h"
+#include "write_barrier.h"
+#include "data_heap.h"
+#include "local_roots.h"
#include "data_gc.h"
#include "debug.h"
#include "types.h"
#include "float_bits.h"
#include "io.h"
#include "code_gc.h"
+#include "code_block.h"
#include "code_heap.h"
#include "image.h"
#include "callstack.h"
primitive_resize_byte_array,
primitive_dll_validp,
primitive_unimplemented,
- primitive_gc_reset,
+ primitive_clear_gc_stats,
primitive_jit_compile,
primitive_load_locals,
};
#include "master.h"
/* Allocates memory */
-F_COMPILED *compile_profiling_stub(F_WORD *word)
+F_CODE_BLOCK *compile_profiling_stub(F_WORD *word)
{
CELL literals = allot_array_1(tag_object(word));
REGISTER_ROOT(literals);
untag_object(code),
NULL, /* no labels */
tag_object(relocation),
- untag_object(literals));
+ literals);
}
/* Allocates memory */
if(!word->profiling)
{
REGISTER_UNTAGGED(word);
- F_COMPILED *profiling = compile_profiling_stub(word);
+ F_CODE_BLOCK *profiling = compile_profiling_stub(word);
UNREGISTER_UNTAGGED(word);
word->profiling = profiling;
}
bool profiling_p;
void primitive_profiling(void);
-F_COMPILED *compile_profiling_stub(F_WORD *word);
+F_CODE_BLOCK *compile_profiling_stub(F_WORD *word);
void update_word_xt(F_WORD *word);
return false;
}
-void set_quot_xt(F_QUOTATION *quot, F_COMPILED *code)
+void set_quot_xt(F_QUOTATION *quot, F_CODE_BLOCK *code)
{
if(code->type != QUOTATION_TYPE)
critical_error("bad param to set_quot_xt",(CELL)code);
GROWABLE_ARRAY_TRIM(literals);
GROWABLE_BYTE_ARRAY_TRIM(relocation);
- F_COMPILED *compiled = add_compiled_block(
+ F_CODE_BLOCK *compiled = add_compiled_block(
QUOTATION_TYPE,
untag_object(code),
NULL,
relocation,
- untag_object(literals));
+ literals);
set_quot_xt(untag_object(quot),compiled);
if(relocate)
- iterate_code_heap_step(compiled,relocate_code_block);
+ relocate_code_block(compiled);
UNREGISTER_ROOT(literals);
UNREGISTER_ROOT(relocation);
{
F_WORD *word = untag_word(array_nth(untag_array(words),i));
REGISTER_UNTAGGED(word);
- if(word->compiledp == F)
+ if(word->optimizedp == F)
default_word_code(word,false);
UNREGISTER_UNTAGGED(word);
update_word_xt(word);
-void set_quot_xt(F_QUOTATION *quot, F_COMPILED *code);
+void set_quot_xt(F_QUOTATION *quot, F_CODE_BLOCK *code);
void jit_compile(CELL quot, bool relocate);
F_FASTCALL CELL lazy_jit_compile_impl(CELL quot, F_STACK_FRAME *stack);
F_FIXNUM quot_code_offset_to_scan(CELL quot, F_FIXNUM offset);
word->def = userenv[UNDEFINED_ENV];
word->props = F;
word->counter = tag_fixnum(0);
- word->compiledp = F;
+ word->optimizedp = F;
word->subprimitive = F;
word->profiling = NULL;
word->code = NULL;
UNREGISTER_UNTAGGED(word);
if(profiling_p)
- iterate_code_heap_step(word->profiling,relocate_code_block);
+ relocate_code_block(word->profiling);
return word;
}
void primitive_word_xt(void)
{
F_WORD *word = untag_word(dpop());
- F_COMPILED *code = (profiling_p ? word->profiling : word->code);
- dpush(allot_cell((CELL)code + sizeof(F_COMPILED)));
- dpush(allot_cell((CELL)code + sizeof(F_COMPILED) + code->code_length));
+ F_CODE_BLOCK *code = (profiling_p ? word->profiling : word->code);
+ dpush(allot_cell((CELL)code + sizeof(F_CODE_BLOCK)));
+ dpush(allot_cell((CELL)code + sizeof(F_CODE_BLOCK) + code->code_length));
}
void primitive_wrapper(void)
--- /dev/null
+/* card marking write barrier. a card is a byte storing a mark flag,
+and the offset (in cells) of the first object in the card.
+
+the mark flag is set by the write barrier when an object in the
+card has a slot written to.
+
+the offset of the first object is set by the allocator. */
+
+/* if CARD_POINTS_TO_NURSERY is set, CARD_POINTS_TO_AGING must also be set. */
+#define CARD_POINTS_TO_NURSERY 0x80
+#define CARD_POINTS_TO_AGING 0x40
+#define CARD_MARK_MASK (CARD_POINTS_TO_NURSERY | CARD_POINTS_TO_AGING)
+typedef u8 F_CARD;
+
+#define CARD_BITS 8
+#define CARD_SIZE (1<<CARD_BITS)
+#define ADDR_CARD_MASK (CARD_SIZE-1)
+
+DLLEXPORT CELL cards_offset;
+
+#define ADDR_TO_CARD(a) (F_CARD*)(((CELL)(a) >> CARD_BITS) + cards_offset)
+#define CARD_TO_ADDR(c) (CELL*)(((CELL)(c) - cards_offset)<<CARD_BITS)
+
+typedef u8 F_DECK;
+
+#define DECK_BITS (CARD_BITS + 10)
+#define DECK_SIZE (1<<DECK_BITS)
+#define ADDR_DECK_MASK (DECK_SIZE-1)
+
+DLLEXPORT CELL decks_offset;
+
+#define ADDR_TO_DECK(a) (F_DECK*)(((CELL)(a) >> DECK_BITS) + decks_offset)
+#define DECK_TO_ADDR(c) (CELL*)(((CELL)(c) - decks_offset) << DECK_BITS)
+
+#define DECK_TO_CARD(d) (F_CARD*)((((CELL)(d) - decks_offset) << (DECK_BITS - CARD_BITS)) + cards_offset)
+
+#define ADDR_TO_ALLOT_MARKER(a) (F_CARD*)(((CELL)(a) >> CARD_BITS) + allot_markers_offset)
+#define CARD_OFFSET(c) (*((c) - (CELL)data_heap->cards + (CELL)data_heap->allot_markers))
+
+#define INVALID_ALLOT_MARKER 0xff
+
+DLLEXPORT CELL allot_markers_offset;
+
+/* the write barrier must be called any time we are potentially storing a
+pointer from an older generation to a younger one */
+INLINE void write_barrier(CELL address)
+{
+ *ADDR_TO_CARD(address) = CARD_MARK_MASK;
+ *ADDR_TO_DECK(address) = CARD_MARK_MASK;
+}
+
+#define SLOT(obj,slot) (UNTAG(obj) + (slot) * CELLS)
+
+INLINE void set_slot(CELL obj, CELL slot, CELL value)
+{
+ put(SLOT(obj,slot),value);
+ write_barrier(obj);
+}
+
+/* we need to remember the first object allocated in the card */
+INLINE void allot_barrier(CELL address)
+{
+ F_CARD *ptr = ADDR_TO_ALLOT_MARKER(address);
+ if(*ptr == INVALID_ALLOT_MARKER)
+ *ptr = (address & ADDR_CARD_MASK);
+}