-! Copyright (C) 2008 Slava Pestov.
+! Copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: alien arrays alien.c-types alien.structs
-sequences math kernel namespaces fry libc cpu.architecture ;
+USING: alien alien.strings alien.c-types alien.accessors alien.structs
+arrays words sequences math kernel namespaces fry libc cpu.architecture
+io.encodings.utf8 io.encodings.utf16n ;
IN: alien.arrays
UNION: value-type array struct-type ;
M: value-type c-type-setter ( type -- quot )
[ c-type-getter ] [ c-type-unboxer-quot ] [ heap-size ] tri
'[ @ swap @ _ memcpy ] ;
+
+PREDICATE: string-type < pair
+ first2 [ "char*" = ] [ word? ] bi* and ;
+
+M: string-type c-type ;
+
+M: string-type c-type-class
+ drop object ;
+
+M: string-type heap-size
+ drop "void*" heap-size ;
+
+M: string-type c-type-align
+ drop "void*" c-type-align ;
+
+M: string-type c-type-stack-align?
+ drop "void*" c-type-stack-align? ;
+
+M: string-type unbox-parameter
+ drop "void*" unbox-parameter ;
+
+M: string-type unbox-return
+ drop "void*" unbox-return ;
+
+M: string-type box-parameter
+ drop "void*" box-parameter ;
+
+M: string-type box-return
+ drop "void*" box-return ;
+
+M: string-type stack-size
+ drop "void*" stack-size ;
+
+M: string-type c-type-reg-class
+ drop int-regs ;
+
+M: string-type c-type-boxer
+ drop "void*" c-type-boxer ;
+
+M: string-type c-type-unboxer
+ drop "void*" c-type-unboxer ;
+
+M: string-type c-type-boxer-quot
+ second '[ _ alien>string ] ;
+
+M: string-type c-type-unboxer-quot
+ second '[ _ string>alien ] ;
+
+M: string-type c-type-getter
+ drop [ alien-cell ] ;
+
+M: string-type c-type-setter
+ drop [ set-alien-cell ] ;
+
+{ "char*" utf8 } "char*" typedef
+"char*" "uchar*" typedef
+{ "char*" utf16n } "wchar_t*" typedef
+
IN: alien.c-types
USING: alien help.syntax help.markup libc kernel.private
-byte-arrays math strings hashtables alien.syntax
-debugger destructors ;
+byte-arrays math strings hashtables alien.syntax alien.strings sequences
+io.encodings.string debugger destructors ;
HELP: <c-type>
{ $values { "type" hashtable } }
{ $description "Defines a word " { $snippet "<" { $emphasis "name" } ">" } " with stack effect " { $snippet "( value -- array )" } ". This word allocates a byte array large enough to hold a value with C type " { $snippet "name" } ", and writes the value at the top of the stack to the array." }
{ $notes "This is an internal word called when defining C types, there is no need to call it on your own." } ;
+{ string>alien alien>string malloc-string } related-words
+
+HELP: malloc-string
+{ $values { "string" string } { "encoding" "an encoding descriptor" } { "alien" c-ptr } }
+{ $description "Encodes a string together with a trailing null code point using the given encoding, and stores the resulting bytes in a freshly-allocated unmanaged memory block." }
+{ $warning "Don't forget to deallocate the memory with a call to " { $link free } "." }
+{ $errors "Throws an error if one of the following conditions occurs:"
+ { $list
+ "the string contains null code points"
+ "the string contains characters not representable using the encoding specified"
+ "memory allocation fails"
+ }
+} ;
+
+ARTICLE: "c-strings" "C strings"
+"C string types are arrays with shape " { $snippet "{ \"char*\" encoding }" } ", where " { $snippet "encoding" } " is an encoding descriptor. The type " { $snippet "\"char*\"" } " is an alias for " { $snippet "{ \"char*\" utf8 }" } ". See " { $link "encodings-descriptors" } " for information about encoding descriptors."
+$nl
+"Passing a Factor string to a C function expecting a C string allocates a " { $link byte-array } " in the Factor heap; the string is then converted to the requested format and a raw pointer is passed to the function."
+$nl
+"If the conversion fails, for example if the string contains null bytes or characters with values higher than 255, a " { $link c-string-error. } " is thrown."
+$nl
+"Care must be taken if the C function expects a " { $snippet "char*" } " with a length in bytes, rather than a null-terminated " { $snippet "char*" } "; passing the result of calling " { $link length } " on the string object will not suffice. This is because a Factor string of " { $emphasis "n" } " characters will not necessarily encode to " { $emphasis "n" } " bytes. The correct idiom for C functions which take a string with a length is to first encode the string using " { $link encode } ", and then pass the resulting byte array together with the length of this byte array."
+$nl
+"Sometimes a C function has a parameter type of " { $snippet "void*" } ", and various data types, among them strings, can be passed in. In this case, strings are not automatically converted to aliens, and instead you must call one of these words:"
+{ $subsection string>alien }
+{ $subsection malloc-string }
+"The first allocates " { $link byte-array } "s, and the latter allocates manually-managed memory which is not moved by the garbage collector and has to be explicitly freed by calling " { $link free } ". See " { $link "byte-arrays-gc" } " for a discussion of the two approaches."
+$nl
+"A word to read strings from arbitrary addresses:"
+{ $subsection alien>string }
+"For example, if a C function returns a " { $snippet "char*" } " but stipulates that the caller must deallocate the memory afterward, you must define the function as returning " { $snippet "void*" } ", and call one of the above words before passing the pointer to " { $link free } "." ;
+
ARTICLE: "byte-arrays-gc" "Byte arrays and the garbage collector"
"The Factor garbage collector can move byte arrays around, and it is only safe to pass byte arrays to C functions if the garbage collector will not run while C code still has a reference to the data."
$nl
! See http://factorcode.org/license.txt for BSD license.
USING: byte-arrays arrays assocs kernel kernel.private libc math
namespaces make parser sequences strings words assocs splitting
-math.parser cpu.architecture alien alien.accessors quotations
-layouts system compiler.units io.files io.encodings.binary
-accessors combinators effects continuations fry classes ;
+math.parser cpu.architecture alien alien.accessors alien.strings
+quotations layouts system compiler.units io io.files
+io.encodings.binary io.streams.memory accessors combinators effects
+continuations fry classes ;
IN: alien.c-types
DEFER: <int>
: memory>byte-array ( alien len -- byte-array )
[ nip (byte-array) dup ] 2keep memcpy ;
+: malloc-string ( string encoding -- alien )
+ string>alien malloc-byte-array ;
+
+M: memory-stream stream-read
+ [
+ [ index>> ] [ alien>> ] bi <displaced-alien>
+ swap memory>byte-array
+ ] [ [ + ] change-index drop ] 2bi ;
+
: byte-array>memory ( byte-array base -- )
swap dup byte-length memcpy ;
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors alien assocs io.backend kernel namespaces ;
+USING: accessors alien alien.strings assocs io.backend kernel namespaces ;
IN: alien.libraries
+: dlopen ( path -- dll ) native-string>alien (dlopen) ;
+
+: dlsym ( name dll -- alien ) [ native-string>alien ] dip (dlsym) ;
+
SYMBOL: libraries
libraries [ H{ } clone ] initialize
library dup [ dll>> ] when ;
: add-library ( name path abi -- )
- <library> swap libraries get set-at ;
+ <library> swap libraries get set-at ;
\ No newline at end of file
+++ /dev/null
-USING: help.markup help.syntax strings byte-arrays alien libc
-debugger io.encodings.string sequences ;
-IN: alien.strings
-
-HELP: string>alien
-{ $values { "string" string } { "encoding" "an encoding descriptor" } { "byte-array" byte-array } }
-{ $description "Encodes a string together with a trailing null code point using the given encoding, and stores the resulting bytes in a freshly-allocated byte array." }
-{ $errors "Throws an error if the string contains null characters, or characters not representable in the given encoding." } ;
-
-{ string>alien alien>string malloc-string } related-words
-
-HELP: alien>string
-{ $values { "c-ptr" c-ptr } { "encoding" "an encoding descriptor" } { "string/f" "a string or " { $link f } } }
-{ $description "Reads a null-terminated C string from the specified address with the given encoding." } ;
-
-HELP: malloc-string
-{ $values { "string" string } { "encoding" "an encoding descriptor" } { "alien" c-ptr } }
-{ $description "Encodes a string together with a trailing null code point using the given encoding, and stores the resulting bytes in a freshly-allocated unmanaged memory block." }
-{ $warning "Don't forget to deallocate the memory with a call to " { $link free } "." }
-{ $errors "Throws an error if one of the following conditions occurs:"
- { $list
- "the string contains null code points"
- "the string contains characters not representable using the encoding specified"
- "memory allocation fails"
- }
-} ;
-
-HELP: string>symbol
-{ $values { "str" string } { "alien" alien } }
-{ $description "Converts the string to a format which is a valid symbol name for the Factor VM's compiled code linker. By performing this conversion ahead of time, the image loader can run without allocating memory."
-$nl
-"On Windows CE, symbols are represented as UCS2 strings, and on all other platforms they are ASCII strings." } ;
-
-ARTICLE: "c-strings" "C strings"
-"C string types are arrays with shape " { $snippet "{ \"char*\" encoding }" } ", where " { $snippet "encoding" } " is an encoding descriptor. The type " { $snippet "\"char*\"" } " is an alias for " { $snippet "{ \"char*\" utf8 }" } ". See " { $link "encodings-descriptors" } " for information about encoding descriptors."
-$nl
-"Passing a Factor string to a C function expecting a C string allocates a " { $link byte-array } " in the Factor heap; the string is then converted to the requested format and a raw pointer is passed to the function."
-$nl
-"If the conversion fails, for example if the string contains null bytes or characters with values higher than 255, a " { $link c-string-error. } " is thrown."
-$nl
-"Care must be taken if the C function expects a " { $snippet "char*" } " with a length in bytes, rather than a null-terminated " { $snippet "char*" } "; passing the result of calling " { $link length } " on the string object will not suffice. This is because a Factor string of " { $emphasis "n" } " characters will not necessarily encode to " { $emphasis "n" } " bytes. The correct idiom for C functions which take a string with a length is to first encode the string using " { $link encode } ", and then pass the resulting byte array together with the length of this byte array."
-$nl
-"Sometimes a C function has a parameter type of " { $snippet "void*" } ", and various data types, among them strings, can be passed in. In this case, strings are not automatically converted to aliens, and instead you must call one of these words:"
-{ $subsection string>alien }
-{ $subsection malloc-string }
-"The first allocates " { $link byte-array } "s, and the latter allocates manually-managed memory which is not moved by the garbage collector and has to be explicitly freed by calling " { $link free } ". See " { $link "byte-arrays-gc" } " for a discussion of the two approaches."
-$nl
-"A word to read strings from arbitrary addresses:"
-{ $subsection alien>string }
-"For example, if a C function returns a " { $snippet "char*" } " but stipulates that the caller must deallocate the memory afterward, you must define the function as returning " { $snippet "void*" } ", and call one of the above words before passing the pointer to " { $link free } "." ;
-
-ABOUT: "c-strings"
+++ /dev/null
-USING: alien.strings tools.test kernel libc
-io.encodings.8-bit io.encodings.utf8 io.encodings.utf16
-io.encodings.utf16n io.encodings.ascii alien io.encodings.string ;
-IN: alien.strings.tests
-
-[ "\u0000ff" ]
-[ "\u0000ff" latin1 string>alien latin1 alien>string ]
-unit-test
-
-[ "hello world" ]
-[ "hello world" latin1 string>alien latin1 alien>string ]
-unit-test
-
-[ "hello\u00abcdworld" ]
-[ "hello\u00abcdworld" utf16le string>alien utf16le alien>string ]
-unit-test
-
-[ t ] [ f expired? ] unit-test
-
-[ "hello world" ] [
- "hello world" ascii malloc-string
- dup ascii alien>string swap free
-] unit-test
-
-[ "hello world" ] [
- "hello world" utf16n malloc-string
- dup utf16n alien>string swap free
-] unit-test
-
-[ f ] [ f utf8 alien>string ] unit-test
-
-[ "hello" ] [ "hello" utf16 encode utf16 decode ] unit-test
-
-[ "hello" ] [ "hello" utf16 string>alien utf16 alien>string ] unit-test
+++ /dev/null
-! Copyright (C) 2008 Slava Pestov.
-! See http://factorcode.org/license.txt for BSD license.
-USING: arrays sequences kernel accessors math alien.accessors
-alien.c-types byte-arrays words io io.encodings
-io.encodings.utf8 io.streams.byte-array io.streams.memory system
-alien strings cpu.architecture fry vocabs.loader combinators ;
-IN: alien.strings
-
-GENERIC# alien>string 1 ( c-ptr encoding -- string/f )
-
-M: c-ptr alien>string
- [ <memory-stream> ] [ <decoder> ] bi*
- "\0" swap stream-read-until drop ;
-
-M: f alien>string
- drop ;
-
-ERROR: invalid-c-string string ;
-
-: check-string ( string -- )
- 0 over memq? [ invalid-c-string ] [ drop ] if ;
-
-GENERIC# string>alien 1 ( string encoding -- byte-array )
-
-M: c-ptr string>alien drop ;
-
-M: string string>alien
- over check-string
- <byte-writer>
- [ stream-write ]
- [ 0 swap stream-write1 ]
- [ stream>> >byte-array ]
- tri ;
-
-: malloc-string ( string encoding -- alien )
- string>alien malloc-byte-array ;
-
-PREDICATE: string-type < pair
- first2 [ "char*" = ] [ word? ] bi* and ;
-
-M: string-type c-type ;
-
-M: string-type c-type-class
- drop object ;
-
-M: string-type heap-size
- drop "void*" heap-size ;
-
-M: string-type c-type-align
- drop "void*" c-type-align ;
-
-M: string-type c-type-stack-align?
- drop "void*" c-type-stack-align? ;
-
-M: string-type unbox-parameter
- drop "void*" unbox-parameter ;
-
-M: string-type unbox-return
- drop "void*" unbox-return ;
-
-M: string-type box-parameter
- drop "void*" box-parameter ;
-
-M: string-type box-return
- drop "void*" box-return ;
-
-M: string-type stack-size
- drop "void*" stack-size ;
-
-M: string-type c-type-reg-class
- drop int-regs ;
-
-M: string-type c-type-boxer
- drop "void*" c-type-boxer ;
-
-M: string-type c-type-unboxer
- drop "void*" c-type-unboxer ;
-
-M: string-type c-type-boxer-quot
- second '[ _ alien>string ] ;
-
-M: string-type c-type-unboxer-quot
- second '[ _ string>alien ] ;
-
-M: string-type c-type-getter
- drop [ alien-cell ] ;
-
-M: string-type c-type-setter
- drop [ set-alien-cell ] ;
-
-HOOK: alien>native-string os ( alien -- string )
-
-HOOK: native-string>alien os ( string -- alien )
-
-: dll-path ( dll -- string )
- path>> alien>native-string ;
-
-: string>symbol ( str -- alien )
- dup string?
- [ native-string>alien ]
- [ [ native-string>alien ] map ] if ;
-
-{ "char*" utf8 } "char*" typedef
-"char*" "uchar*" typedef
-
-{
- { [ os windows? ] [ "alien.strings.windows" require ] }
- { [ os unix? ] [ "alien.strings.unix" require ] }
-} cond
+++ /dev/null
-Passing Factor strings as C strings and vice versa
+++ /dev/null
-Default string encoding on Unix
+++ /dev/null
-! Copyright (C) 2008 Slava Pestov.
-! See http://factorcode.org/license.txt for BSD license.
-USING: alien.strings io.encodings.utf8 system ;
-IN: alien.strings.unix
-
-M: unix alien>native-string utf8 alien>string ;
-
-M: unix native-string>alien utf8 string>alien ;
+++ /dev/null
-Default string encoding on Windows
+++ /dev/null
-unportable
+++ /dev/null
-! Copyright (C) 2008 Slava Pestov.
-! See http://factorcode.org/license.txt for BSD license.
-USING: alien.strings alien.c-types io.encodings.utf8
-io.encodings.utf16n system ;
-IN: alien.strings.windows
-
-M: windows alien>native-string utf16n alien>string ;
-
-M: wince native-string>alien utf16n string>alien ;
-
-M: winnt native-string>alien utf8 string>alien ;
-
-{ "char*" utf16n } "wchar_t*" typedef
"stage2: deployment mode" print
] [
"debugger" require
- "alien.prettyprint" require
"inspector" require
"tools.errors" require
"listener" require
+++ /dev/null
-USING: arrays byte-arrays help.markup help.syntax kernel combinators ;\r
-IN: byte-vectors\r
-\r
-ARTICLE: "byte-vectors" "Byte vectors"\r
-"The " { $vocab-link "byte-vectors" } " vocabulary implements resizable mutable sequence of unsigned bytes. Byte vectors implement the " { $link "sequence-protocol" } " and thus all " { $link "sequences" } " can be used with them."\r
-$nl\r
-"Byte vectors form a class:"\r
-{ $subsection byte-vector }\r
-{ $subsection byte-vector? }\r
-"Creating byte vectors:"\r
-{ $subsection >byte-vector }\r
-{ $subsection <byte-vector> }\r
-"Literal syntax:"\r
-{ $subsection POSTPONE: BV{ }\r
-"If you don't care about initial capacity, a more elegant way to create a new byte vector is to write:"\r
-{ $code "BV{ } clone" } ;\r
-\r
-ABOUT: "byte-vectors"\r
-\r
-HELP: byte-vector\r
-{ $description "The class of resizable byte vectors. See " { $link "byte-vectors" } " for information." } ;\r
-\r
-HELP: <byte-vector>\r
-{ $values { "n" "a positive integer specifying initial capacity" } { "byte-vector" byte-vector } }\r
-{ $description "Creates a new byte vector that can hold " { $snippet "n" } " bytes before resizing." } ;\r
-\r
-HELP: >byte-vector\r
-{ $values { "seq" "a sequence" } { "byte-vector" byte-vector } }\r
-{ $description "Outputs a freshly-allocated byte vector with the same elements as a given sequence." }\r
-{ $errors "Throws an error if the sequence contains elements other than integers." } ;\r
-\r
-HELP: BV{\r
-{ $syntax "BV{ elements... }" }\r
-{ $values { "elements" "a list of bytes" } }\r
-{ $description "Marks the beginning of a literal byte vector. Literal byte vectors are terminated by " { $link POSTPONE: } } "." } \r
-{ $examples { $code "BV{ 1 2 3 12 }" } } ;\r
+++ /dev/null
-IN: byte-vectors.tests\r
-USING: tools.test byte-vectors vectors sequences kernel\r
-prettyprint ;\r
-\r
-[ 0 ] [ 123 <byte-vector> length ] unit-test\r
-\r
-: do-it ( seq -- seq )\r
- 123 [ over push ] each ;\r
-\r
-[ t ] [\r
- 3 <byte-vector> do-it\r
- 3 <vector> do-it sequence=\r
-] unit-test\r
-\r
-[ t ] [ BV{ } byte-vector? ] unit-test\r
-\r
-[ "BV{ }" ] [ BV{ } unparse ] unit-test\r
+++ /dev/null
-! Copyright (C) 2008 Slava Pestov.\r
-! See http://factorcode.org/license.txt for BSD license.\r
-USING: arrays kernel kernel.private math sequences\r
-sequences.private growable byte-arrays accessors parser\r
-prettyprint.custom ;\r
-IN: byte-vectors\r
-\r
-TUPLE: byte-vector\r
-{ underlying byte-array }\r
-{ length array-capacity } ;\r
-\r
-: <byte-vector> ( n -- byte-vector )\r
- (byte-array) 0 byte-vector boa ; inline\r
-\r
-: >byte-vector ( seq -- byte-vector )\r
- T{ byte-vector f B{ } 0 } clone-like ;\r
-\r
-M: byte-vector like\r
- drop dup byte-vector? [\r
- dup byte-array?\r
- [ dup length byte-vector boa ] [ >byte-vector ] if\r
- ] unless ;\r
-\r
-M: byte-vector new-sequence\r
- drop [ (byte-array) ] [ >fixnum ] bi byte-vector boa ;\r
-\r
-M: byte-vector equal?\r
- over byte-vector? [ sequence= ] [ 2drop f ] if ;\r
-\r
-M: byte-array like\r
- #! If we have an byte-array, we're done.\r
- #! If we have a byte-vector, and it's at full capacity,\r
- #! we're done. Otherwise, call resize-byte-array, which is a\r
- #! relatively fast primitive.\r
- drop dup byte-array? [\r
- dup byte-vector? [\r
- [ length ] [ underlying>> ] bi\r
- 2dup length eq?\r
- [ nip ] [ resize-byte-array ] if\r
- ] [ >byte-array ] if\r
- ] unless ;\r
-\r
-M: byte-array new-resizable drop <byte-vector> ;\r
-\r
-SYNTAX: BV{ \ } [ >byte-vector ] parse-literal ;\r
-\r
-M: byte-vector pprint* pprint-object ;\r
-M: byte-vector pprint-delims drop \ BV{ \ } ;\r
-M: byte-vector >pprint-sequence ;\r
-\r
-INSTANCE: byte-vector growable\r
+++ /dev/null
-Growable byte arrays
+++ /dev/null
-collections
-! Copyright (C) 2003, 2008 Slava Pestov.
+! Copyright (C) 2003, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: init continuations hashtables io io.encodings.utf8
io.files io.pathnames kernel kernel.private namespaces parser
-sequences strings system splitting vocabs.loader ;
+sequences strings system splitting vocabs.loader alien.strings ;
IN: command-line
SYMBOL: script
SYMBOL: command-line
-: (command-line) ( -- args ) 10 getenv sift ;
+: (command-line) ( -- args ) 10 getenv sift [ alien>native-string ] map ;
: rc-path ( name -- path )
os windows? [ "." prepend ] unless
! Make sure error reporting works
-[ [ dup ] compile-call ] must-fail
-[ [ drop ] compile-call ] must-fail
+! [ [ dup ] compile-call ] must-fail
+! [ [ drop ] compile-call ] must-fail
! Regression
! Copyright (C) 2004, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: slots arrays definitions generic hashtables summary io
-kernel math namespaces make prettyprint prettyprint.config
-sequences assocs sequences.private strings io.styles
-io.pathnames vectors words system splitting math.parser
-classes.mixin classes.tuple continuations continuations.private
-combinators generic.math classes.builtin classes compiler.units
-generic.standard generic.single vocabs init kernel.private io.encodings
-accessors math.order destructors source-files parser
-classes.tuple.parser effects.parser lexer
+USING: slots arrays definitions generic hashtables summary io kernel
+math namespaces make prettyprint prettyprint.config sequences assocs
+sequences.private strings io.styles io.pathnames vectors words system
+splitting math.parser classes.mixin classes.tuple continuations
+continuations.private combinators generic.math classes.builtin classes
+compiler.units generic.standard generic.single vocabs init
+kernel.private io.encodings accessors math.order destructors
+source-files parser classes.tuple.parser effects.parser lexer
generic.parser strings.parser vocabs.loader vocabs.parser see
source-files.errors ;
IN: debugger
GENERIC: error-help ( error -- topic )
M: object error. . ;
+
M: object error-help drop f ;
M: tuple error-help class ;
"Object did not survive image save/load: " write third . ;
: io-error. ( error -- )
- "I/O error: " write third print ;
+ "I/O error #" write third . ;
: type-check-error. ( obj -- )
"Type check error" print
"Cannot convert to C string: " write third . ;
: ffi-error. ( obj -- )
- "FFI: " write
- dup third [ write ": " write ] when*
- fourth print ;
+ "FFI error" print drop ;
: heap-scan-error. ( obj -- )
"Cannot do next-object outside begin/end-scan" print drop ;
! See http://factorcode.org/license.txt for BSD license.
USING: kernel strings values io.files assocs
splitting sequences io namespaces sets
-io.encodings.ascii io.encodings.utf8 ;
+io.encodings.ascii io.encodings.utf8 io.encodings.utf16 ;
IN: io.encodings.iana
<PRIVATE
] [ swap e>n-table get-global set-at ] 2bi ;
ascii "ANSI_X3.4-1968" register-encoding
+utf16be "UTF-16BE" register-encoding
+utf16le "UTF-16LE" register-encoding
+utf16 "UTF-16" register-encoding
\ No newline at end of file
+++ /dev/null
-Daniel Ehrenberg
+++ /dev/null
-UTF16 encoding/decoding
+++ /dev/null
-! Copyright (C) 2008 Daniel Ehrenberg
-! See http://factorcode.org/license.txt for BSD license.
-USING: help.markup help.syntax io.encodings strings ;
-IN: io.encodings.utf16
-
-ARTICLE: "io.encodings.utf16" "UTF-16 encoding"
-"The UTF-16 encoding is a variable-width encoding. Unicode code points are encoded as 2 or 4 byte sequences. There are three encoding descriptor classes for working with UTF-16, depending on endianness or the presence of a BOM:"
-{ $subsection utf16 }
-{ $subsection utf16le }
-{ $subsection utf16be } ;
-
-ABOUT: "io.encodings.utf16"
-
-HELP: utf16le
-{ $class-description "The encoding descriptor for UTF-16LE, that is, UTF-16 in little endian, without a byte order mark. Streams can be made which read or write wth this encoding." }
-{ $see-also "encodings-introduction" } ;
-
-HELP: utf16be
-{ $class-description "The encoding descriptor for UTF-16BE, that is, UTF-16 in big endian, without a byte order mark. Streams can be made which read or write wth this encoding." }
-{ $see-also "encodings-introduction" } ;
-
-HELP: utf16
-{ $class-description "The encoding descriptor for UTF-16, that is, UTF-16 with a byte order mark. This is the most useful for general input and output in UTF-16. Streams can be made which read or write wth this encoding." }
-{ $see-also "encodings-introduction" } ;
-
-{ utf16 utf16le utf16be } related-words
+++ /dev/null
-! Copyright (C) 2008 Daniel Ehrenberg.
-! See http://factorcode.org/license.txt for BSD license.
-USING: kernel tools.test io.encodings.utf16 arrays sbufs
-io.streams.byte-array sequences io.encodings io strings
-io.encodings.string alien.c-types alien.strings accessors classes ;
-IN: io.encodings.utf16.tests
-
-[ { CHAR: x } ] [ B{ 0 CHAR: x } utf16be decode >array ] unit-test
-[ { HEX: 1D11E } ] [ B{ HEX: D8 HEX: 34 HEX: DD HEX: 1E } utf16be decode >array ] unit-test
-[ { CHAR: replacement-character } ] [ B{ BIN: 11011111 CHAR: q } utf16be decode >array ] unit-test
-[ { CHAR: replacement-character } ] [ B{ BIN: 11011011 CHAR: x BIN: 11011011 CHAR: x } utf16be decode >array ] unit-test
-
-[ { 0 120 216 52 221 30 } ] [ { CHAR: x HEX: 1d11e } >string utf16be encode >array ] unit-test
-
-[ { CHAR: x } ] [ B{ CHAR: x 0 } utf16le decode >array ] unit-test
-[ { 119070 } ] [ B{ HEX: 34 HEX: D8 HEX: 1E HEX: DD } >string utf16le decode >array ] unit-test
-[ { CHAR: replacement-character } ] [ { 0 BIN: 11011111 } >string utf16le decode >array ] unit-test
-[ { CHAR: replacement-character } ] [ { 0 BIN: 11011011 0 0 } >string utf16le decode >array ] unit-test
-
-[ { 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } >string utf16le encode >array ] unit-test
-
-[ { CHAR: x } ] [ B{ HEX: ff HEX: fe CHAR: x 0 } utf16 decode >array ] unit-test
-[ { CHAR: x } ] [ B{ HEX: fe HEX: ff 0 CHAR: x } utf16 decode >array ] unit-test
-
-[ { HEX: ff HEX: fe 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } >string utf16 encode >array ] unit-test
+++ /dev/null
-! Copyright (C) 2006, 2009 Daniel Ehrenberg.
-! See http://factorcode.org/license.txt for BSD license.
-USING: math kernel sequences sbufs vectors namespaces io.binary
-io.encodings combinators splitting io byte-arrays io.encodings.iana ;
-IN: io.encodings.utf16
-
-SINGLETON: utf16be
-
-utf16be "UTF-16BE" register-encoding
-
-SINGLETON: utf16le
-
-utf16le "UTF-16LE" register-encoding
-
-SINGLETON: utf16
-
-utf16 "UTF-16" register-encoding
-
-ERROR: missing-bom ;
-
-<PRIVATE
-
-! UTF-16BE decoding
-
-: append-nums ( byte ch -- ch )
- over [ 8 shift bitor ] [ 2drop replacement-char ] if ;
-
-: double-be ( stream byte -- stream char )
- over stream-read1 swap append-nums ;
-
-: quad-be ( stream byte -- stream char )
- double-be over stream-read1 [
- dup -2 shift BIN: 110111 number= [
- [ 2 shift ] dip BIN: 11 bitand bitor
- over stream-read1 swap append-nums HEX: 10000 +
- ] [ 2drop dup stream-read1 drop replacement-char ] if
- ] when* ;
-
-: ignore ( stream -- stream char )
- dup stream-read1 drop replacement-char ;
-
-: begin-utf16be ( stream byte -- stream char )
- dup -3 shift BIN: 11011 number= [
- dup BIN: 00000100 bitand zero?
- [ BIN: 11 bitand quad-be ]
- [ drop ignore ] if
- ] [ double-be ] if ;
-
-M: utf16be decode-char
- drop dup stream-read1 dup [ begin-utf16be ] when nip ;
-
-! UTF-16LE decoding
-
-: quad-le ( stream ch -- stream char )
- over stream-read1 swap 10 shift bitor
- over stream-read1 dup -2 shift BIN: 110111 = [
- BIN: 11 bitand append-nums HEX: 10000 +
- ] [ 2drop replacement-char ] if ;
-
-: double-le ( stream byte1 byte2 -- stream char )
- dup -3 shift BIN: 11011 = [
- dup BIN: 100 bitand 0 number=
- [ BIN: 11 bitand 8 shift bitor quad-le ]
- [ 2drop replacement-char ] if
- ] [ append-nums ] if ;
-
-: begin-utf16le ( stream byte -- stream char )
- over stream-read1 [ double-le ] [ drop replacement-char ] if* ;
-
-M: utf16le decode-char
- drop dup stream-read1 dup [ begin-utf16le ] when nip ;
-
-! UTF-16LE/BE encoding
-
-: encode-first ( char -- byte1 byte2 )
- -10 shift
- dup -8 shift BIN: 11011000 bitor
- swap HEX: FF bitand ;
-
-: encode-second ( char -- byte3 byte4 )
- BIN: 1111111111 bitand
- dup -8 shift BIN: 11011100 bitor
- swap BIN: 11111111 bitand ;
-
-: stream-write2 ( stream char1 char2 -- )
- rot [ stream-write1 ] curry bi@ ;
-
-: char>utf16be ( stream char -- )
- dup HEX: FFFF > [
- HEX: 10000 -
- 2dup encode-first stream-write2
- encode-second stream-write2
- ] [ h>b/b swap stream-write2 ] if ;
-
-M: utf16be encode-char ( char stream encoding -- )
- drop swap char>utf16be ;
-
-: char>utf16le ( char stream -- )
- dup HEX: FFFF > [
- HEX: 10000 -
- 2dup encode-first swap stream-write2
- encode-second swap stream-write2
- ] [ h>b/b stream-write2 ] if ;
-
-M: utf16le encode-char ( char stream encoding -- )
- drop swap char>utf16le ;
-
-! UTF-16
-
-CONSTANT: bom-le B{ HEX: ff HEX: fe }
-
-CONSTANT: bom-be B{ HEX: fe HEX: ff }
-
-: bom>le/be ( bom -- le/be )
- dup bom-le sequence= [ drop utf16le ] [
- bom-be sequence= [ utf16be ] [ missing-bom ] if
- ] if ;
-
-M: utf16 <decoder> ( stream utf16 -- decoder )
- drop 2 over stream-read bom>le/be <decoder> ;
-
-M: utf16 <encoder> ( stream utf16 -- encoder )
- drop bom-le over stream-write utf16le <encoder> ;
-
-PRIVATE>
+++ /dev/null
-Daniel Ehrenberg
+++ /dev/null
-UTF16 encoding with native byte order
+++ /dev/null
-USING: help.markup help.syntax ;
-IN: io.encodings.utf16n
-
-HELP: utf16n
-{ $class-description "The encoding descriptor for UTF-16 without a byte order mark in native endian order. This is useful mostly for FFI calls which take input of strings of the type wchar_t*" }
-{ $see-also "encodings-introduction" } ;
+++ /dev/null
-USING: accessors alien.c-types kernel
-io.encodings.utf16 io.streams.byte-array tools.test ;
-IN: io.encodings.utf16n
-
-: correct-endian ( obj -- ? )
- code>> little-endian? [ utf16le = ] [ utf16be = ] if ;
-
-[ t ] [ B{ } utf16n <byte-reader> correct-endian ] unit-test
-[ t ] [ utf16n <byte-writer> correct-endian ] unit-test
+++ /dev/null
-! Copyright (C) 2008 Slava Pestov.
-! See http://factorcode.org/license.txt for BSD license.
-USING: alien.c-types io.encodings io.encodings.utf16 kernel ;
-IN: io.encodings.utf16n
-
-! Native-order UTF-16
-
-SINGLETON: utf16n
-
-: utf16n ( -- descriptor )
- little-endian? utf16le utf16be ? ; foldable
-
-M: utf16n <decoder> drop utf16n <decoder> ;
-
-M: utf16n <encoder> drop utf16n <encoder> ;
+++ /dev/null
-USING: help.syntax help.markup io byte-arrays quotations ;
-IN: io.streams.byte-array
-
-ABOUT: "io.streams.byte-array"
-
-ARTICLE: "io.streams.byte-array" "Byte-array streams"
-"Byte array streams:"
-{ $subsection <byte-reader> }
-{ $subsection <byte-writer> }
-"Utility combinators:"
-{ $subsection with-byte-reader }
-{ $subsection with-byte-writer } ;
-
-HELP: <byte-reader>
-{ $values { "byte-array" byte-array }
- { "encoding" "an encoding descriptor" }
- { "stream" "a new byte reader" } }
-{ $description "Creates an input stream reading from a byte array using an encoding." } ;
-
-HELP: <byte-writer>
-{ $values { "encoding" "an encoding descriptor" }
- { "stream" "a new byte writer" } }
-{ $description "Creates an output stream writing data to a byte array using an encoding." } ;
-
-HELP: with-byte-reader
-{ $values { "encoding" "an encoding descriptor" }
- { "quot" quotation } { "byte-array" byte-array } }
-{ $description "Calls the quotation in a new dynamic scope with " { $link input-stream } " rebound to an input stream for reading from a byte array using an encoding." } ;
-
-HELP: with-byte-writer
-{ $values { "encoding" "an encoding descriptor" }
- { "quot" quotation }
- { "byte-array" byte-array } }
-{ $description "Calls the quotation in a new dynamic scope with " { $link output-stream } " rebound to an output stream writing data to a byte array using an encoding." } ;
+++ /dev/null
-USING: tools.test io.streams.byte-array io.encodings.binary
-io.encodings.utf8 io kernel arrays strings namespaces ;
-
-[ B{ 1 2 3 } ] [ binary [ B{ 1 2 3 } write ] with-byte-writer ] unit-test
-[ B{ 1 2 3 } ] [ { 1 2 3 } binary [ 3 read ] with-byte-reader ] unit-test
-
-[ B{ BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 BIN: 11101111 BIN: 10000000 BIN: 10111111 BIN: 11011111 BIN: 10000000 CHAR: x } ]
-[ { BIN: 101111111000000111111 BIN: 1111000000111111 BIN: 11111000000 CHAR: x } >string utf8 [ write ] with-byte-writer ] unit-test
-[ { BIN: 101111111000000111111 } t ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } utf8 <byte-reader> contents dup >array swap string? ] unit-test
-
-[ B{ 121 120 } 0 ] [
- B{ 0 121 120 0 0 0 0 0 0 } binary
- [ 1 read drop "\0" read-until ] with-byte-reader
-] unit-test
-
-[ 1 1 4 11 f ] [
- B{ 1 2 3 4 5 6 7 8 9 10 11 12 } binary
- [
- read1
- 0 seek-absolute input-stream get stream-seek
- read1
- 2 seek-relative input-stream get stream-seek
- read1
- -2 seek-end input-stream get stream-seek
- read1
- 0 seek-end input-stream get stream-seek
- read1
- ] with-byte-reader
-] unit-test
\ No newline at end of file
+++ /dev/null
-! Copyright (C) 2008, 2009 Daniel Ehrenberg
-! See http://factorcode.org/license.txt for BSD license.
-USING: byte-arrays byte-vectors kernel io.encodings io.streams.string
-sequences io namespaces io.encodings.private accessors sequences.private
-io.streams.sequence destructors math combinators ;
-IN: io.streams.byte-array
-
-M: byte-vector stream-element-type drop +byte+ ;
-
-: <byte-writer> ( encoding -- stream )
- 512 <byte-vector> swap <encoder> ;
-
-: with-byte-writer ( encoding quot -- byte-array )
- [ <byte-writer> ] dip [ output-stream get ] compose with-output-stream*
- dup encoder? [ stream>> ] when >byte-array ; inline
-
-TUPLE: byte-reader { underlying byte-array read-only } { i array-capacity } ;
-
-M: byte-reader stream-element-type drop +byte+ ;
-
-M: byte-reader stream-read-partial stream-read ;
-M: byte-reader stream-read sequence-read ;
-M: byte-reader stream-read1 sequence-read1 ;
-M: byte-reader stream-read-until sequence-read-until ;
-M: byte-reader dispose drop ;
-
-M: byte-reader stream-seek ( n seek-type stream -- )
- swap {
- { seek-absolute [ (>>i) ] }
- { seek-relative [ [ + ] change-i drop ] }
- { seek-end [ [ underlying>> length + ] keep (>>i) ] }
- [ bad-seek-type ]
- } case ;
-
-: <byte-reader> ( byte-array encoding -- stream )
- [ B{ } like 0 byte-reader boa ] dip <decoder> ;
-
-: with-byte-reader ( byte-array encoding quot -- )
- [ <byte-reader> ] dip with-input-stream* ; inline
+++ /dev/null
-Streams for reading and writing bytes in a byte array
+++ /dev/null
-! Copyright (C) 2008 Slava Pestov.
-! See http://factorcode.org/license.txt for BSD license.
-USING: kernel accessors alien alien.c-types alien.accessors math io ;
-IN: io.streams.memory
-
-TUPLE: memory-stream alien index ;
-
-: <memory-stream> ( alien -- stream )
- 0 memory-stream boa ;
-
-M: memory-stream stream-element-type drop +byte+ ;
-
-M: memory-stream stream-read1
- [ [ alien>> ] [ index>> ] bi alien-unsigned-1 ]
- [ [ 1+ ] change-index drop ] bi ;
-
-M: memory-stream stream-read
- [
- [ index>> ] [ alien>> ] bi <displaced-alien>
- swap memory>byte-array
- ] [ [ + ] change-index drop ] 2bi ;
+++ /dev/null
-Streams for reading data directly from memory
! Copyright (C) 2008 Peter Burns.
! See http://factorcode.org/license.txt for BSD license.
-USING: kernel peg peg.ebnf math.parser math.private strings math
+USING: kernel peg peg.ebnf math.parser math.parser.private strings math
math.functions sequences arrays vectors hashtables assocs
prettyprint json ;
IN: json.reader
-! Copyright (C) 2003, 2008 Slava Pestov.
+! Copyright (C) 2003, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays byte-arrays generic hashtables io assocs
-kernel math namespaces make sequences strings sbufs vectors
+USING: accessors arrays byte-arrays byte-vectors generic hashtables io
+assocs kernel math namespaces make sequences strings sbufs vectors
words prettyprint.config prettyprint.custom prettyprint.sections
-quotations io io.pathnames io.styles math.parser effects
-classes.tuple math.order classes.tuple.private classes
-combinators colors ;
+quotations io io.pathnames io.styles math.parser effects classes.tuple
+math.order classes.tuple.private classes combinators colors ;
IN: prettyprint.backend
M: effect pprint* effect>string "(" ")" surround text ;
M: compose pprint-delims drop \ [ \ ] ;
M: array pprint-delims drop \ { \ } ;
M: byte-array pprint-delims drop \ B{ \ } ;
+M: byte-vector pprint-delims drop \ BV{ \ } ;
M: vector pprint-delims drop \ V{ \ } ;
M: hashtable pprint-delims drop \ H{ \ } ;
M: tuple pprint-delims drop \ T{ \ } ;
M: object >pprint-sequence ;
M: vector >pprint-sequence ;
+M: byte-vector >pprint-sequence ;
M: curry >pprint-sequence ;
M: compose >pprint-sequence ;
M: hashtable >pprint-sequence >alist ;
M: object pprint* pprint-object ;
M: vector pprint* pprint-object ;
+M: byte-vector pprint* pprint-object ;
M: hashtable pprint* pprint-object ;
M: curry pprint* pprint-object ;
M: compose pprint* pprint-object ;
! Copyright (C) 2004, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: fry accessors alien alien.accessors arrays byte-arrays
-classes sequences.private continuations.private effects generic
-hashtables hashtables.private io io.backend io.files
-io.files.private io.streams.c kernel kernel.private math
-math.private memory namespaces namespaces.private parser
-quotations quotations.private sbufs sbufs.private
-sequences sequences.private slots.private strings
+USING: fry accessors alien alien.accessors arrays byte-arrays classes
+sequences.private continuations.private effects generic hashtables
+hashtables.private io io.backend io.files io.files.private
+io.streams.c kernel kernel.private math math.private
+math.parser.private memory memory.private namespaces
+namespaces.private parser quotations quotations.private sbufs
+sbufs.private sequences sequences.private slots.private strings
strings.private system threads.private classes.tuple
-classes.tuple.private vectors vectors.private words definitions
-assocs summary compiler.units system.private
-combinators combinators.short-circuit locals locals.backend locals.types
+classes.tuple.private vectors vectors.private words definitions assocs
+summary compiler.units system.private combinators
+combinators.short-circuit locals locals.backend locals.types
quotations.private combinators.private stack-checker.values
generic.single generic.single.private
alien.libraries
\ bignum>float { bignum } { float } define-primitive
\ bignum>float make-foldable
-\ string>float { string } { float } define-primitive
-\ string>float make-foldable
+\ (string>float) { byte-array } { float } define-primitive
+\ (string>float) make-foldable
-\ float>string { float } { string } define-primitive
-\ float>string make-foldable
+\ (float>string) { float } { byte-array } define-primitive
+\ (float>string) make-foldable
\ float>bits { real } { integer } define-primitive
\ float>bits make-foldable
\ gc-stats { } { array } define-primitive
-\ save-image { string } { } define-primitive
+\ (save-image) { byte-array } { } define-primitive
-\ save-image-and-exit { string } { } define-primitive
+\ (save-image-and-exit) { byte-array } { } define-primitive
\ data-room { } { integer integer array } define-primitive
\ data-room make-flushable
\ tag { object } { fixnum } define-primitive
\ tag make-foldable
-\ dlopen { string } { dll } define-primitive
+\ (dlopen) { byte-array } { dll } define-primitive
-\ dlsym { string object } { c-ptr } define-primitive
+\ (dlsym) { byte-array object } { c-ptr } define-primitive
\ dlclose { dll } { } define-primitive
\ die { } { } define-primitive
-\ fopen { string string } { alien } define-primitive
+\ (fopen) { byte-array byte-array } { alien } define-primitive
\ fgetc { alien } { object } define-primitive
--- /dev/null
+USING: help.markup help.syntax strings byte-arrays alien libc
+debugger io.encodings.string sequences ;
+IN: alien.strings
+
+HELP: string>alien
+{ $values { "string" string } { "encoding" "an encoding descriptor" } { "byte-array" byte-array } }
+{ $description "Encodes a string together with a trailing null code point using the given encoding, and stores the resulting bytes in a freshly-allocated byte array." }
+{ $errors "Throws an error if the string contains null characters, or characters not representable in the given encoding." } ;
+
+HELP: alien>string
+{ $values { "c-ptr" c-ptr } { "encoding" "an encoding descriptor" } { "string/f" "a string or " { $link f } } }
+{ $description "Reads a null-terminated C string from the specified address with the given encoding." } ;
+
+HELP: string>symbol
+{ $values { "str" string } { "alien" alien } }
+{ $description "Converts the string to a format which is a valid symbol name for the Factor VM's compiled code linker. By performing this conversion ahead of time, the image loader can run without allocating memory."
+$nl
+"On Windows CE, symbols are represented as UCS2 strings, and on all other platforms they are ASCII strings." } ;
+
+ABOUT: "c-strings"
--- /dev/null
+USING: alien.strings tools.test kernel libc
+io.encodings.8-bit io.encodings.utf8 io.encodings.utf16
+io.encodings.utf16n io.encodings.ascii alien io.encodings.string ;
+IN: alien.strings.tests
+
+[ "\u0000ff" ]
+[ "\u0000ff" latin1 string>alien latin1 alien>string ]
+unit-test
+
+[ "hello world" ]
+[ "hello world" latin1 string>alien latin1 alien>string ]
+unit-test
+
+[ "hello\u00abcdworld" ]
+[ "hello\u00abcdworld" utf16le string>alien utf16le alien>string ]
+unit-test
+
+[ t ] [ f expired? ] unit-test
+
+[ "hello world" ] [
+ "hello world" ascii malloc-string
+ dup ascii alien>string swap free
+] unit-test
+
+[ "hello world" ] [
+ "hello world" utf16n malloc-string
+ dup utf16n alien>string swap free
+] unit-test
+
+[ f ] [ f utf8 alien>string ] unit-test
+
+[ "hello" ] [ "hello" utf16 encode utf16 decode ] unit-test
+
+[ "hello" ] [ "hello" utf16 string>alien utf16 alien>string ] unit-test
--- /dev/null
+! Copyright (C) 2008, 2009 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: arrays sequences kernel kernel.private accessors math
+alien.accessors byte-arrays io io.encodings io.encodings.utf8
+io.encodings.utf16n io.streams.byte-array io.streams.memory system
+system.private alien strings combinators namespaces init ;
+IN: alien.strings
+
+GENERIC# alien>string 1 ( c-ptr encoding -- string/f )
+
+M: c-ptr alien>string
+ [ <memory-stream> ] [ <decoder> ] bi*
+ "\0" swap stream-read-until drop ;
+
+M: f alien>string
+ drop ;
+
+ERROR: invalid-c-string string ;
+
+: check-string ( string -- )
+ 0 over memq? [ invalid-c-string ] [ drop ] if ;
+
+GENERIC# string>alien 1 ( string encoding -- byte-array )
+
+M: c-ptr string>alien drop ;
+
+M: string string>alien
+ over check-string
+ <byte-writer>
+ [ stream-write ]
+ [ 0 swap stream-write1 ]
+ [ stream>> >byte-array ]
+ tri ;
+
+HOOK: alien>native-string os ( alien -- string )
+
+HOOK: native-string>alien os ( string -- alien )
+
+M: windows alien>native-string utf16n alien>string ;
+
+M: wince native-string>alien utf16n string>alien ;
+
+M: winnt native-string>alien utf8 string>alien ;
+
+M: unix alien>native-string utf8 alien>string ;
+
+M: unix native-string>alien utf8 string>alien ;
+
+: dll-path ( dll -- string )
+ path>> alien>native-string ;
+
+: string>symbol ( str -- alien )
+ dup string?
+ [ native-string>alien ]
+ [ [ native-string>alien ] map ] if ;
+
+[
+ 8 getenv utf8 alien>string string>cpu \ cpu set-global
+ 9 getenv utf8 alien>string string>os \ os set-global
+] "alien.strings" add-init-hook
+
--- /dev/null
+Passing Factor strings as C strings and vice versa
"kernel"
"kernel.private"
"math"
+ "math.parser.private"
"math.private"
"memory"
+ "memory.private"
"quotations"
"quotations.private"
"sbufs"
{ "float>bignum" "math.private" (( x -- y )) }
{ "fixnum>float" "math.private" (( x -- y )) }
{ "bignum>float" "math.private" (( x -- y )) }
- { "string>float" "math.private" (( str -- n/f )) }
- { "float>string" "math.private" (( n -- str )) }
+ { "(string>float)" "math.parser.private" (( str -- n/f )) }
+ { "(float>string)" "math.parser.private" (( n -- str )) }
{ "float>bits" "math" (( x -- n )) }
{ "double>bits" "math" (( x -- n )) }
{ "bits>float" "math" (( n -- x )) }
{ "(exists?)" "io.files.private" (( path -- ? )) }
{ "gc" "memory" (( -- )) }
{ "gc-stats" "memory" f }
- { "save-image" "memory" (( path -- )) }
- { "save-image-and-exit" "memory" (( path -- )) }
+ { "(save-image)" "memory.private" (( path -- )) }
+ { "(save-image-and-exit)" "memory.private" (( path -- )) }
{ "datastack" "kernel" (( -- ds )) }
{ "retainstack" "kernel" (( -- rs )) }
{ "callstack" "kernel" (( -- cs )) }
{ "code-room" "memory" (( -- code-free code-total )) }
{ "micros" "system" (( -- us )) }
{ "modify-code-heap" "compiler.units" (( alist -- )) }
- { "dlopen" "alien.libraries" (( path -- dll )) }
- { "dlsym" "alien.libraries" (( name dll -- alien )) }
+ { "(dlopen)" "alien.libraries" (( path -- dll )) }
+ { "(dlsym)" "alien.libraries" (( name dll -- alien )) }
{ "dlclose" "alien.libraries" (( dll -- )) }
{ "<byte-array>" "byte-arrays" (( n -- byte-array )) }
{ "(byte-array)" "byte-arrays" (( n -- byte-array )) }
{ "<displaced-alien>" "alien" (( displacement c-ptr -- alien )) }
- { "alien-signed-cell" "alien.accessors" f }
- { "set-alien-signed-cell" "alien.accessors" f }
- { "alien-unsigned-cell" "alien.accessors" f }
- { "set-alien-unsigned-cell" "alien.accessors" f }
- { "alien-signed-8" "alien.accessors" f }
- { "set-alien-signed-8" "alien.accessors" f }
- { "alien-unsigned-8" "alien.accessors" f }
- { "set-alien-unsigned-8" "alien.accessors" f }
- { "alien-signed-4" "alien.accessors" f }
- { "set-alien-signed-4" "alien.accessors" f }
- { "alien-unsigned-4" "alien.accessors" f }
- { "set-alien-unsigned-4" "alien.accessors" f }
- { "alien-signed-2" "alien.accessors" f }
- { "set-alien-signed-2" "alien.accessors" f }
- { "alien-unsigned-2" "alien.accessors" f }
- { "set-alien-unsigned-2" "alien.accessors" f }
- { "alien-signed-1" "alien.accessors" f }
- { "set-alien-signed-1" "alien.accessors" f }
- { "alien-unsigned-1" "alien.accessors" f }
- { "set-alien-unsigned-1" "alien.accessors" f }
- { "alien-float" "alien.accessors" f }
- { "set-alien-float" "alien.accessors" f }
- { "alien-double" "alien.accessors" f }
- { "set-alien-double" "alien.accessors" f }
- { "alien-cell" "alien.accessors" f }
- { "set-alien-cell" "alien.accessors" f }
+ { "alien-signed-cell" "alien.accessors" (( c-ptr n -- value )) }
+ { "set-alien-signed-cell" "alien.accessors" (( value c-ptr n -- )) }
+ { "alien-unsigned-cell" "alien.accessors" (( c-ptr n -- value )) }
+ { "set-alien-unsigned-cell" "alien.accessors" (( value c-ptr n -- )) }
+ { "alien-signed-8" "alien.accessors" (( c-ptr n -- value )) }
+ { "set-alien-signed-8" "alien.accessors" (( value c-ptr n -- )) }
+ { "alien-unsigned-8" "alien.accessors" (( c-ptr n -- value )) }
+ { "set-alien-unsigned-8" "alien.accessors" (( value c-ptr n -- )) }
+ { "alien-signed-4" "alien.accessors" (( c-ptr n -- value )) }
+ { "set-alien-signed-4" "alien.accessors" (( value c-ptr n -- )) }
+ { "alien-unsigned-4" "alien.accessors" (( c-ptr n -- value )) }
+ { "set-alien-unsigned-4" "alien.accessors" (( value c-ptr n -- )) }
+ { "alien-signed-2" "alien.accessors" (( c-ptr n -- value )) }
+ { "set-alien-signed-2" "alien.accessors" (( value c-ptr n -- )) }
+ { "alien-unsigned-2" "alien.accessors" (( c-ptr n -- value )) }
+ { "set-alien-unsigned-2" "alien.accessors" (( value c-ptr n -- )) }
+ { "alien-signed-1" "alien.accessors" (( c-ptr n -- value )) }
+ { "set-alien-signed-1" "alien.accessors" (( value c-ptr n -- )) }
+ { "alien-unsigned-1" "alien.accessors" (( c-ptr n -- value )) }
+ { "set-alien-unsigned-1" "alien.accessors" (( value c-ptr n -- )) }
+ { "alien-float" "alien.accessors" (( c-ptr n -- value )) }
+ { "set-alien-float" "alien.accessors" (( value c-ptr n -- )) }
+ { "alien-double" "alien.accessors" (( c-ptr n -- value )) }
+ { "set-alien-double" "alien.accessors" (( value c-ptr n -- )) }
+ { "alien-cell" "alien.accessors" (( c-ptr n -- value )) }
+ { "set-alien-cell" "alien.accessors" (( value c-ptr n -- )) }
{ "alien-address" "alien" (( c-ptr -- addr )) }
{ "set-slot" "slots.private" (( value obj n -- )) }
{ "string-nth" "strings.private" (( n string -- ch )) }
{ "end-scan" "memory" (( -- )) }
{ "size" "memory" (( obj -- n )) }
{ "die" "kernel" (( -- )) }
- { "fopen" "io.streams.c" (( path mode -- alien )) }
+ { "(fopen)" "io.streams.c" (( path mode -- alien )) }
{ "fgetc" "io.streams.c" (( alien -- ch/f )) }
{ "fread" "io.streams.c" (( n alien -- str/f )) }
{ "fputc" "io.streams.c" (( ch alien -- )) }
"<PRIVATE"
"BIN:"
"B{"
+ "BV{"
"C:"
"CHAR:"
"DEFER:"
--- /dev/null
+USING: arrays byte-arrays help.markup help.syntax kernel combinators ;\r
+IN: byte-vectors\r
+\r
+ARTICLE: "byte-vectors" "Byte vectors"\r
+"The " { $vocab-link "byte-vectors" } " vocabulary implements resizable mutable sequence of unsigned bytes. Byte vectors implement the " { $link "sequence-protocol" } " and thus all " { $link "sequences" } " can be used with them."\r
+$nl\r
+"Byte vectors form a class:"\r
+{ $subsection byte-vector }\r
+{ $subsection byte-vector? }\r
+"Creating byte vectors:"\r
+{ $subsection >byte-vector }\r
+{ $subsection <byte-vector> }\r
+"Literal syntax:"\r
+{ $subsection POSTPONE: BV{ }\r
+"If you don't care about initial capacity, a more elegant way to create a new byte vector is to write:"\r
+{ $code "BV{ } clone" } ;\r
+\r
+ABOUT: "byte-vectors"\r
+\r
+HELP: byte-vector\r
+{ $description "The class of resizable byte vectors. See " { $link "byte-vectors" } " for information." } ;\r
+\r
+HELP: <byte-vector>\r
+{ $values { "n" "a positive integer specifying initial capacity" } { "byte-vector" byte-vector } }\r
+{ $description "Creates a new byte vector that can hold " { $snippet "n" } " bytes before resizing." } ;\r
+\r
+HELP: >byte-vector\r
+{ $values { "seq" "a sequence" } { "byte-vector" byte-vector } }\r
+{ $description "Outputs a freshly-allocated byte vector with the same elements as a given sequence." }\r
+{ $errors "Throws an error if the sequence contains elements other than integers." } ;\r
+\r
+HELP: BV{\r
+{ $syntax "BV{ elements... }" }\r
+{ $values { "elements" "a list of bytes" } }\r
+{ $description "Marks the beginning of a literal byte vector. Literal byte vectors are terminated by " { $link POSTPONE: } } "." } \r
+{ $examples { $code "BV{ 1 2 3 12 }" } } ;\r
--- /dev/null
+IN: byte-vectors.tests\r
+USING: tools.test byte-vectors vectors sequences kernel\r
+prettyprint ;\r
+\r
+[ 0 ] [ 123 <byte-vector> length ] unit-test\r
+\r
+: do-it ( seq -- seq )\r
+ 123 [ over push ] each ;\r
+\r
+[ t ] [\r
+ 3 <byte-vector> do-it\r
+ 3 <vector> do-it sequence=\r
+] unit-test\r
+\r
+[ t ] [ BV{ } byte-vector? ] unit-test\r
+\r
+[ "BV{ }" ] [ BV{ } unparse ] unit-test\r
--- /dev/null
+! Copyright (C) 2008 Slava Pestov.\r
+! See http://factorcode.org/license.txt for BSD license.\r
+USING: arrays kernel kernel.private math sequences\r
+sequences.private growable byte-arrays accessors ;\r
+IN: byte-vectors\r
+\r
+TUPLE: byte-vector\r
+{ underlying byte-array }\r
+{ length array-capacity } ;\r
+\r
+: <byte-vector> ( n -- byte-vector )\r
+ (byte-array) 0 byte-vector boa ; inline\r
+\r
+: >byte-vector ( seq -- byte-vector )\r
+ T{ byte-vector f B{ } 0 } clone-like ;\r
+\r
+M: byte-vector like\r
+ drop dup byte-vector? [\r
+ dup byte-array?\r
+ [ dup length byte-vector boa ] [ >byte-vector ] if\r
+ ] unless ;\r
+\r
+M: byte-vector new-sequence\r
+ drop [ (byte-array) ] [ >fixnum ] bi byte-vector boa ;\r
+\r
+M: byte-vector equal?\r
+ over byte-vector? [ sequence= ] [ 2drop f ] if ;\r
+\r
+M: byte-array like\r
+ #! If we have an byte-array, we're done.\r
+ #! If we have a byte-vector, and it's at full capacity,\r
+ #! we're done. Otherwise, call resize-byte-array, which is a\r
+ #! relatively fast primitive.\r
+ drop dup byte-array? [\r
+ dup byte-vector? [\r
+ [ length ] [ underlying>> ] bi\r
+ 2dup length eq?\r
+ [ nip ] [ resize-byte-array ] if\r
+ ] [ >byte-array ] if\r
+ ] unless ;\r
+\r
+M: byte-array new-resizable drop <byte-vector> ;\r
+\r
+INSTANCE: byte-vector growable\r
--- /dev/null
+Growable byte arrays
--- /dev/null
+collections
\r
[ sa ] [ sa { sa sb sc } min-class ] unit-test\r
\r
+[ f ] [ sa sb classes-intersect? ] unit-test\r
+\r
[ +lt+ ] [ integer sequence class<=> ] unit-test\r
[ +lt+ ] [ sequence object class<=> ] unit-test\r
[ +gt+ ] [ object sequence class<=> ] unit-test\r
[ +eq+ ] [ integer integer class<=> ] unit-test\r
+\r
+! Limitations:\r
+\r
+! UNION: u1 sa sb ;\r
+! UNION: u2 sc ;\r
+\r
+! [ f ] [ u1 u2 classes-intersect? ] unit-test
\ No newline at end of file
--- /dev/null
+Daniel Ehrenberg
--- /dev/null
+UTF16 encoding/decoding
--- /dev/null
+! Copyright (C) 2008 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.markup help.syntax io.encodings strings ;
+IN: io.encodings.utf16
+
+ARTICLE: "io.encodings.utf16" "UTF-16 encoding"
+"The UTF-16 encoding is a variable-width encoding. Unicode code points are encoded as 2 or 4 byte sequences. There are three encoding descriptor classes for working with UTF-16, depending on endianness or the presence of a BOM:"
+{ $subsection utf16 }
+{ $subsection utf16le }
+{ $subsection utf16be } ;
+
+ABOUT: "io.encodings.utf16"
+
+HELP: utf16le
+{ $class-description "The encoding descriptor for UTF-16LE, that is, UTF-16 in little endian, without a byte order mark. Streams can be made which read or write wth this encoding." }
+{ $see-also "encodings-introduction" } ;
+
+HELP: utf16be
+{ $class-description "The encoding descriptor for UTF-16BE, that is, UTF-16 in big endian, without a byte order mark. Streams can be made which read or write wth this encoding." }
+{ $see-also "encodings-introduction" } ;
+
+HELP: utf16
+{ $class-description "The encoding descriptor for UTF-16, that is, UTF-16 with a byte order mark. This is the most useful for general input and output in UTF-16. Streams can be made which read or write wth this encoding." }
+{ $see-also "encodings-introduction" } ;
+
+{ utf16 utf16le utf16be } related-words
--- /dev/null
+! Copyright (C) 2008 Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+USING: kernel tools.test io.encodings.utf16 arrays sbufs
+io.streams.byte-array sequences io.encodings io strings
+io.encodings.string alien.c-types alien.strings accessors classes ;
+IN: io.encodings.utf16.tests
+
+[ { CHAR: x } ] [ B{ 0 CHAR: x } utf16be decode >array ] unit-test
+[ { HEX: 1D11E } ] [ B{ HEX: D8 HEX: 34 HEX: DD HEX: 1E } utf16be decode >array ] unit-test
+[ { CHAR: replacement-character } ] [ B{ BIN: 11011111 CHAR: q } utf16be decode >array ] unit-test
+[ { CHAR: replacement-character } ] [ B{ BIN: 11011011 CHAR: x BIN: 11011011 CHAR: x } utf16be decode >array ] unit-test
+
+[ { 0 120 216 52 221 30 } ] [ { CHAR: x HEX: 1d11e } >string utf16be encode >array ] unit-test
+
+[ { CHAR: x } ] [ B{ CHAR: x 0 } utf16le decode >array ] unit-test
+[ { 119070 } ] [ B{ HEX: 34 HEX: D8 HEX: 1E HEX: DD } >string utf16le decode >array ] unit-test
+[ { CHAR: replacement-character } ] [ { 0 BIN: 11011111 } >string utf16le decode >array ] unit-test
+[ { CHAR: replacement-character } ] [ { 0 BIN: 11011011 0 0 } >string utf16le decode >array ] unit-test
+
+[ { 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } >string utf16le encode >array ] unit-test
+
+[ { CHAR: x } ] [ B{ HEX: ff HEX: fe CHAR: x 0 } utf16 decode >array ] unit-test
+[ { CHAR: x } ] [ B{ HEX: fe HEX: ff 0 CHAR: x } utf16 decode >array ] unit-test
+
+[ { HEX: ff HEX: fe 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } >string utf16 encode >array ] unit-test
--- /dev/null
+! Copyright (C) 2006, 2009 Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+USING: math kernel sequences sbufs vectors namespaces io.binary
+io.encodings combinators splitting io byte-arrays ;
+IN: io.encodings.utf16
+
+SINGLETON: utf16be
+
+SINGLETON: utf16le
+
+SINGLETON: utf16
+
+ERROR: missing-bom ;
+
+<PRIVATE
+
+! UTF-16BE decoding
+
+: append-nums ( byte ch -- ch )
+ over [ 8 shift bitor ] [ 2drop replacement-char ] if ;
+
+: double-be ( stream byte -- stream char )
+ over stream-read1 swap append-nums ;
+
+: quad-be ( stream byte -- stream char )
+ double-be over stream-read1 [
+ dup -2 shift BIN: 110111 number= [
+ [ 2 shift ] dip BIN: 11 bitand bitor
+ over stream-read1 swap append-nums HEX: 10000 +
+ ] [ 2drop dup stream-read1 drop replacement-char ] if
+ ] when* ;
+
+: ignore ( stream -- stream char )
+ dup stream-read1 drop replacement-char ;
+
+: begin-utf16be ( stream byte -- stream char )
+ dup -3 shift BIN: 11011 number= [
+ dup BIN: 00000100 bitand zero?
+ [ BIN: 11 bitand quad-be ]
+ [ drop ignore ] if
+ ] [ double-be ] if ;
+
+M: utf16be decode-char
+ drop dup stream-read1 dup [ begin-utf16be ] when nip ;
+
+! UTF-16LE decoding
+
+: quad-le ( stream ch -- stream char )
+ over stream-read1 swap 10 shift bitor
+ over stream-read1 dup -2 shift BIN: 110111 = [
+ BIN: 11 bitand append-nums HEX: 10000 +
+ ] [ 2drop replacement-char ] if ;
+
+: double-le ( stream byte1 byte2 -- stream char )
+ dup -3 shift BIN: 11011 = [
+ dup BIN: 100 bitand 0 number=
+ [ BIN: 11 bitand 8 shift bitor quad-le ]
+ [ 2drop replacement-char ] if
+ ] [ append-nums ] if ;
+
+: begin-utf16le ( stream byte -- stream char )
+ over stream-read1 [ double-le ] [ drop replacement-char ] if* ;
+
+M: utf16le decode-char
+ drop dup stream-read1 dup [ begin-utf16le ] when nip ;
+
+! UTF-16LE/BE encoding
+
+: encode-first ( char -- byte1 byte2 )
+ -10 shift
+ dup -8 shift BIN: 11011000 bitor
+ swap HEX: FF bitand ;
+
+: encode-second ( char -- byte3 byte4 )
+ BIN: 1111111111 bitand
+ dup -8 shift BIN: 11011100 bitor
+ swap BIN: 11111111 bitand ;
+
+: stream-write2 ( stream char1 char2 -- )
+ rot [ stream-write1 ] curry bi@ ;
+
+: char>utf16be ( stream char -- )
+ dup HEX: FFFF > [
+ HEX: 10000 -
+ 2dup encode-first stream-write2
+ encode-second stream-write2
+ ] [ h>b/b swap stream-write2 ] if ;
+
+M: utf16be encode-char ( char stream encoding -- )
+ drop swap char>utf16be ;
+
+: char>utf16le ( char stream -- )
+ dup HEX: FFFF > [
+ HEX: 10000 -
+ 2dup encode-first swap stream-write2
+ encode-second swap stream-write2
+ ] [ h>b/b stream-write2 ] if ;
+
+M: utf16le encode-char ( char stream encoding -- )
+ drop swap char>utf16le ;
+
+! UTF-16
+
+CONSTANT: bom-le B{ HEX: ff HEX: fe }
+
+CONSTANT: bom-be B{ HEX: fe HEX: ff }
+
+: bom>le/be ( bom -- le/be )
+ dup bom-le sequence= [ drop utf16le ] [
+ bom-be sequence= [ utf16be ] [ missing-bom ] if
+ ] if ;
+
+M: utf16 <decoder> ( stream utf16 -- decoder )
+ drop 2 over stream-read bom>le/be <decoder> ;
+
+M: utf16 <encoder> ( stream utf16 -- encoder )
+ drop bom-le over stream-write utf16le <encoder> ;
+
+PRIVATE>
--- /dev/null
+Daniel Ehrenberg
--- /dev/null
+UTF16 encoding with native byte order
--- /dev/null
+USING: help.markup help.syntax ;
+IN: io.encodings.utf16n
+
+HELP: utf16n
+{ $class-description "The encoding descriptor for UTF-16 without a byte order mark in native endian order. This is useful mostly for FFI calls which take input of strings of the type wchar_t*" }
+{ $see-also "encodings-introduction" } ;
--- /dev/null
+USING: accessors alien.c-types kernel
+io.encodings.utf16 io.streams.byte-array tools.test ;
+IN: io.encodings.utf16n
+
+: correct-endian ( obj -- ? )
+ code>> little-endian? [ utf16le = ] [ utf16be = ] if ;
+
+[ t ] [ B{ } utf16n <byte-reader> correct-endian ] unit-test
+[ t ] [ utf16n <byte-writer> correct-endian ] unit-test
--- /dev/null
+! Copyright (C) 2008 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: io.encodings io.encodings.utf16 kernel alien.accessors ;
+IN: io.encodings.utf16n
+
+! Native-order UTF-16
+
+SINGLETON: utf16n
+
+: utf16n ( -- descriptor )
+ B{ 1 0 0 0 } 0 alien-unsigned-4 1 = utf16le utf16be ? ; foldable
+
+M: utf16n <decoder> drop utf16n <decoder> ;
+
+M: utf16n <encoder> drop utf16n <encoder> ;
-! Copyright (C) 2004, 2008 Slava Pestov, Daniel Ehrenberg.
+! Copyright (C) 2004, 2009 Slava Pestov, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel kernel.private sequences init namespaces system io
-io.backend io.pathnames io.encodings io.files.private ;
+io.backend io.pathnames io.encodings io.files.private
+alien.strings ;
IN: io.files
HOOK: (file-reader) io-backend ( path -- stream )
: with-file-appender ( path encoding quot -- )
[ <file-appender> ] dip with-output-stream ; inline
-: exists? ( path -- ? ) normalize-path (exists?) ;
+: exists? ( path -- ? )
+ normalize-path native-string>alien (exists?) ;
! Current directory
<PRIVATE
[
cwd current-directory set-global
- 13 getenv cwd prepend-path \ image set-global
- 14 getenv cwd prepend-path \ vm set-global
+ 13 getenv alien>native-string cwd prepend-path \ image set-global
+ 14 getenv alien>native-string cwd prepend-path \ vm set-global
image parent-directory "resource-path" set-global
] "io.files" add-init-hook
--- /dev/null
+USING: help.syntax help.markup io byte-arrays quotations ;
+IN: io.streams.byte-array
+
+ABOUT: "io.streams.byte-array"
+
+ARTICLE: "io.streams.byte-array" "Byte-array streams"
+"Byte array streams:"
+{ $subsection <byte-reader> }
+{ $subsection <byte-writer> }
+"Utility combinators:"
+{ $subsection with-byte-reader }
+{ $subsection with-byte-writer } ;
+
+HELP: <byte-reader>
+{ $values { "byte-array" byte-array }
+ { "encoding" "an encoding descriptor" }
+ { "stream" "a new byte reader" } }
+{ $description "Creates an input stream reading from a byte array using an encoding." } ;
+
+HELP: <byte-writer>
+{ $values { "encoding" "an encoding descriptor" }
+ { "stream" "a new byte writer" } }
+{ $description "Creates an output stream writing data to a byte array using an encoding." } ;
+
+HELP: with-byte-reader
+{ $values { "encoding" "an encoding descriptor" }
+ { "quot" quotation } { "byte-array" byte-array } }
+{ $description "Calls the quotation in a new dynamic scope with " { $link input-stream } " rebound to an input stream for reading from a byte array using an encoding." } ;
+
+HELP: with-byte-writer
+{ $values { "encoding" "an encoding descriptor" }
+ { "quot" quotation }
+ { "byte-array" byte-array } }
+{ $description "Calls the quotation in a new dynamic scope with " { $link output-stream } " rebound to an output stream writing data to a byte array using an encoding." } ;
--- /dev/null
+USING: tools.test io.streams.byte-array io.encodings.binary
+io.encodings.utf8 io kernel arrays strings namespaces ;
+
+[ B{ 1 2 3 } ] [ binary [ B{ 1 2 3 } write ] with-byte-writer ] unit-test
+[ B{ 1 2 3 } ] [ { 1 2 3 } binary [ 3 read ] with-byte-reader ] unit-test
+
+[ B{ BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 BIN: 11101111 BIN: 10000000 BIN: 10111111 BIN: 11011111 BIN: 10000000 CHAR: x } ]
+[ { BIN: 101111111000000111111 BIN: 1111000000111111 BIN: 11111000000 CHAR: x } >string utf8 [ write ] with-byte-writer ] unit-test
+[ { BIN: 101111111000000111111 } t ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } utf8 <byte-reader> contents dup >array swap string? ] unit-test
+
+[ B{ 121 120 } 0 ] [
+ B{ 0 121 120 0 0 0 0 0 0 } binary
+ [ 1 read drop "\0" read-until ] with-byte-reader
+] unit-test
+
+[ 1 1 4 11 f ] [
+ B{ 1 2 3 4 5 6 7 8 9 10 11 12 } binary
+ [
+ read1
+ 0 seek-absolute input-stream get stream-seek
+ read1
+ 2 seek-relative input-stream get stream-seek
+ read1
+ -2 seek-end input-stream get stream-seek
+ read1
+ 0 seek-end input-stream get stream-seek
+ read1
+ ] with-byte-reader
+] unit-test
\ No newline at end of file
--- /dev/null
+! Copyright (C) 2008, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: byte-arrays byte-vectors kernel io.encodings sequences io
+namespaces io.encodings.private accessors sequences.private
+io.streams.sequence destructors math combinators ;
+IN: io.streams.byte-array
+
+M: byte-vector stream-element-type drop +byte+ ;
+
+: <byte-writer> ( encoding -- stream )
+ 512 <byte-vector> swap <encoder> ;
+
+: with-byte-writer ( encoding quot -- byte-array )
+ [ <byte-writer> ] dip [ output-stream get ] compose with-output-stream*
+ dup encoder? [ stream>> ] when >byte-array ; inline
+
+TUPLE: byte-reader { underlying byte-array read-only } { i array-capacity } ;
+
+M: byte-reader stream-element-type drop +byte+ ;
+
+M: byte-reader stream-read-partial stream-read ;
+M: byte-reader stream-read sequence-read ;
+M: byte-reader stream-read1 sequence-read1 ;
+M: byte-reader stream-read-until sequence-read-until ;
+M: byte-reader dispose drop ;
+
+M: byte-reader stream-seek ( n seek-type stream -- )
+ swap {
+ { seek-absolute [ (>>i) ] }
+ { seek-relative [ [ + ] change-i drop ] }
+ { seek-end [ [ underlying>> length + ] keep (>>i) ] }
+ [ bad-seek-type ]
+ } case ;
+
+: <byte-reader> ( byte-array encoding -- stream )
+ [ B{ } like 0 byte-reader boa ] dip <decoder> ;
+
+: with-byte-reader ( byte-array encoding quot -- )
+ [ <byte-reader> ] dip with-input-stream* ; inline
--- /dev/null
+Streams for reading and writing bytes in a byte array
! Copyright (C) 2004, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: kernel kernel.private namespaces make io io.encodings
-sequences math generic threads.private classes io.backend
-io.files continuations destructors byte-arrays accessors
-combinators ;
+USING: kernel kernel.private namespaces make io io.encodings sequences
+math generic threads.private classes io.backend io.files
+io.encodings.utf8 alien.strings continuations destructors byte-arrays
+accessors combinators ;
IN: io.streams.c
TUPLE: c-stream handle disposed ;
M: c-io-backend io-multiplex 60 60 * 1000 * 1000 * or (sleep) ;
+: fopen ( path mode -- handle )
+ [ utf8 string>alien ] bi@ (fopen) ;
+
M: c-io-backend (file-reader)
"rb" fopen <c-reader> ;
--- /dev/null
+! Copyright (C) 2008 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: kernel accessors alien alien.accessors math io ;
+IN: io.streams.memory
+
+TUPLE: memory-stream alien index ;
+
+: <memory-stream> ( alien -- stream )
+ 0 memory-stream boa ;
+
+M: memory-stream stream-element-type drop +byte+ ;
+
+M: memory-stream stream-read1
+ [ [ alien>> ] [ index>> ] bi alien-unsigned-1 ]
+ [ [ 1+ ] change-index drop ] bi ;
--- /dev/null
+Streams for reading data directly from memory
-USING: help.markup help.syntax math math.private prettyprint
+USING: help.markup help.syntax math math.parser.private prettyprint
namespaces make strings ;
IN: math.parser
! Copyright (C) 2004, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel math.private namespaces sequences sequences.private
-strings arrays combinators splitting math assocs make ;
+strings arrays combinators splitting math assocs byte-arrays make ;
IN: math.parser
: digit> ( ch -- n )
string>natural
] if ; inline
+: string>float ( str -- n/f )
+ >byte-array 0 suffix (string>float) ;
+
PRIVATE>
: base> ( str radix -- n/f )
[ ".0" append ]
} cond ;
+: float>string ( x -- str )
+ (float>string)
+ [ 0 = ] trim-tail >string
+ fix-float ;
+
M: float >base
drop {
{ [ dup fp-nan? ] [ drop "0/0." ] }
{ [ dup 1/0. = ] [ drop "1/0." ] }
{ [ dup -1/0. = ] [ drop "-1/0." ] }
{ [ dup double>bits HEX: 8000000000000000 = ] [ drop "-0.0" ] }
- [ float>string fix-float ]
+ [ float>string ]
} cond ;
: number>string ( n -- str ) 10 >base ;
-! Copyright (C) 2005, 2008 Slava Pestov.
+! Copyright (C) 2005, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: kernel continuations sequences vectors arrays system math ;
+USING: kernel continuations sequences vectors arrays system math
+io.backend alien.strings memory.private ;
IN: memory
: (each-object) ( quot: ( obj -- ) -- )
[ count-instances 100 + <vector> ] keep swap
[ [ push-if ] 2curry each-object ] keep >array ; inline
+: save-image ( path -- )
+ normalize-path native-string>alien (save-image) ;
+
+: save-image-and-exit ( path -- )
+ normalize-path native-string>alien (save-image) ;
+
: save ( -- ) image save-image ;
! Copyright (C) 2004, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors alien arrays byte-arrays definitions generic
+USING: accessors alien arrays byte-arrays byte-vectors definitions generic
hashtables kernel math namespaces parser lexer sequences strings
strings.parser sbufs vectors words words.symbol words.constant
words.alias quotations io assocs splitting classes.tuple
"{" [ \ } [ >array ] parse-literal ] define-core-syntax
"V{" [ \ } [ >vector ] parse-literal ] define-core-syntax
"B{" [ \ } [ >byte-array ] parse-literal ] define-core-syntax
+ "BV{" [ \ } [ >byte-vector ] parse-literal ] define-core-syntax
"H{" [ \ } [ >hashtable ] parse-literal ] define-core-syntax
"T{" [ parse-tuple-literal parsed ] define-core-syntax
"W{" [ \ } [ first <wrapper> ] parse-literal ] define-core-syntax
-! Copyright (C) 2007 Slava Pestov.
+! Copyright (C) 2007, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-IN: system
USING: kernel kernel.private sequences math namespaces
init splitting assocs system.private layouts words ;
+IN: system
-SINGLETON: x86.32
-SINGLETON: x86.64
-SINGLETON: arm
-SINGLETON: ppc
+SINGLETONS: x86.32 x86.64 arm ppc ;
UNION: x86 x86.32 x86.64 ;
: cpu ( -- class ) \ cpu get-global ; foldable
-SINGLETON: winnt
-SINGLETON: wince
+SINGLETONS: winnt wince ;
UNION: windows winnt wince ;
-SINGLETON: freebsd
-SINGLETON: netbsd
-SINGLETON: openbsd
-SINGLETON: solaris
-SINGLETON: macosx
-SINGLETON: linux
+SINGLETONS: freebsd netbsd openbsd solaris macosx linux ;
SINGLETON: haiku
: vm ( -- path ) \ vm get-global ;
-[
- 8 getenv string>cpu \ cpu set-global
- 9 getenv string>os \ os set-global
-] "system" add-init-hook
-
: embedded? ( -- ? ) 15 getenv ;
: millis ( -- ms ) micros 1000 /i ;
/* open a native library and push a handle */
void primitive_dlopen(void)
{
- gc_root<F_BYTE_ARRAY> path(tag_object(string_to_native_alien(untag_string(dpop()))));
+ gc_root<F_BYTE_ARRAY> path(dpop());
+ path.untag_check();
gc_root<F_DLL> dll(allot<F_DLL>(sizeof(F_DLL)));
dll->path = path.value();
ffi_dlopen(dll.untagged());
void primitive_dlsym(void)
{
gc_root<F_OBJECT> dll(dpop());
- F_SYMBOL *sym = unbox_symbol_string();
+ gc_root<F_BYTE_ARRAY> name(dpop());
+ dll.untag_check();
+ name.untag_check();
+
+ F_CHAR *sym = (F_CHAR *)(name.untagged() + 1);
if(dll.value() == F)
box_alien(ffi_dlsym(NULL,sym));
}
}
-#ifdef FACTOR_DEBUG
- print_obj(symbol); nl(); fflush(stdout);
-#endif
-
return (void *)undefined_symbol;
}
init_profiler();
- userenv[CPU_ENV] = tag_object(from_char_string(FACTOR_CPU_STRING));
- userenv[OS_ENV] = tag_object(from_char_string(FACTOR_OS_STRING));
+ userenv[CPU_ENV] = allot_alien(F,(CELL)FACTOR_CPU_STRING);
+ userenv[OS_ENV] = allot_alien(F,(CELL)FACTOR_OS_STRING);
userenv[CELL_SIZE_ENV] = tag_fixnum(sizeof(CELL));
- userenv[EXECUTABLE_ENV] = (p->executable_path ? tag_object(from_native_string(p->executable_path)) : F);
+ userenv[EXECUTABLE_ENV] = allot_alien(F,(CELL)p->executable_path);
userenv[ARGS_ENV] = F;
userenv[EMBEDDED_ENV] = F;
int i;
for(i = 1; i < argc; i++)
- args.add(tag_object(from_native_string(argv[i])));
+ args.add(allot_alien(F,(CELL)argv[i]));
args.trim();
userenv[ARGS_ENV] = args.array.value();
/* do a full GC to push everything into tenured space */
gc();
- save_image(unbox_native_string());
+ gc_root<F_BYTE_ARRAY> path(dpop());
+ path.untag_check();
+ save_image((F_CHAR *)(path.untagged() + 1));
}
void primitive_save_image_and_exit(void)
-{
+{
/* We unbox this before doing anything else. This is the only point
where we might throw an error, so we have to throw an error here since
later steps destroy the current image. */
- F_CHAR *path = unbox_native_string();
-
- REGISTER_C_STRING(path);
+ gc_root<F_BYTE_ARRAY> path(dpop());
+ path.untag_check();
/* strip out userenv data which is set on startup anyway */
CELL i;
compact_code_heap();
performing_compaction = false;
- UNREGISTER_C_STRING(F_CHAR,path);
-
/* Save the image */
- if(save_image(path))
+ if(save_image((F_CHAR *)(path.untagged() + 1)))
exit(0);
else
exit(1);
relocate_code();
/* Store image path name */
- userenv[IMAGE_ENV] = tag_object(from_native_string(p->image_path));
+ userenv[IMAGE_ENV] = allot_alien(F,(CELL)p->image_path);
}
return;
#endif
- CELL error = tag_object(from_char_string(strerror(errno)));
- general_error(ERROR_IO,error,F,NULL);
+ general_error(ERROR_IO,tag_fixnum(errno),F,NULL);
}
void primitive_fopen(void)
{
- char *mode = unbox_char_string();
- REGISTER_C_STRING(mode);
- char *path = unbox_char_string();
- UNREGISTER_C_STRING(char,mode);
+ gc_root<F_BYTE_ARRAY> mode(dpop());
+ gc_root<F_BYTE_ARRAY> path(dpop());
+ mode.untag_check();
+ path.untag_check();
for(;;)
{
- FILE *file = fopen(path,mode);
+ FILE *file = fopen((char *)(path.untagged() + 1),
+ (char *)(mode.untagged() + 1));
if(file == NULL)
io_error();
else
/* If a runtime function needs to call another function which potentially
-allocates memory, it must store any local variable references to Factor
-objects on the root stack */
+allocates memory, it must wrap any local variable references to Factor
+objects in gc_root instances */
extern F_SEGMENT *gc_locals_region;
extern CELL gc_locals;
DEFPUSHPOP(root_,extra_roots)
-/* We ignore strings which point outside the data heap, but we might be given
-a char* which points inside the data heap, in which case it is a root, for
-example if we call unbox_char_string() the result is placed in a byte array */
-INLINE bool root_push_alien(const void *ptr)
-{
- if(in_data_heap_p((CELL)ptr))
- {
- F_BYTE_ARRAY *objptr = ((F_BYTE_ARRAY *)ptr) - 1;
- if(objptr->header == tag_header(BYTE_ARRAY_TYPE))
- {
- root_push(tag_object(objptr));
- return true;
- }
- }
-
- return false;
-}
-
-#define REGISTER_C_STRING(obj) \
- bool obj##_root = root_push_alien((const char *)obj)
-#define UNREGISTER_C_STRING(type,obj) \
- if(obj##_root) obj = (type *)alien_offset(root_pop())
-
#define REGISTER_BIGNUM(obj) if(obj) root_push(tag_bignum(obj))
#define UNREGISTER_BIGNUM(obj) if(obj) obj = (untag_bignum_fast(root_pop()))
void primitive_str_to_float(void)
{
- char *c_str, *end;
- double f;
- F_STRING *str = untag_string(dpeek());
- CELL capacity = string_capacity(str);
-
- c_str = to_char_string(str,false);
- end = c_str;
- f = strtod(c_str,&end);
- if(end != c_str + capacity)
- drepl(F);
- else
+ F_BYTE_ARRAY *bytes = untag_byte_array(dpeek());
+ CELL capacity = array_capacity(bytes);
+
+ char *c_str = (char *)(bytes + 1);
+ char *end = c_str;
+ double f = strtod(c_str,&end);
+ if(end == c_str + capacity - 1)
drepl(allot_float(f));
+ else
+ drepl(F);
}
void primitive_float_to_str(void)
{
- char tmp[33];
- snprintf(tmp,32,"%.16g",untag_float(dpop()));
- tmp[32] = '\0';
- box_char_string(tmp);
+ F_BYTE_ARRAY *array = allot_byte_array(33);
+ snprintf((char *)(array + 1),32,"%.16g",untag_float(dpop()));
+ dpush(tag_object(array));
}
#define POP_FLOATS(x,y) \
void ffi_dlclose(F_DLL *dll)
{
if(dlclose(dll->dll))
- {
- general_error(ERROR_FFI,tag_object(
- from_char_string(dlerror())),F,NULL);
- }
+ general_error(ERROR_FFI,F,F,NULL);
dll->dll = NULL;
}
void primitive_existsp(void)
{
struct stat sb;
- box_boolean(stat(unbox_char_string(),&sb) >= 0);
+ char *path = (char *)(untag_byte_array(dpop()) + 1);
+ box_boolean(stat(path,&sb) >= 0);
}
F_SEGMENT *alloc_segment(CELL size)
typedef char F_CHAR;
typedef char F_SYMBOL;
-#define from_native_string from_char_string
-#define unbox_native_string unbox_char_string
#define string_to_native_alien(string) string_to_char_alien(string,true)
-#define unbox_symbol_string unbox_char_string
#define STRING_LITERAL(string) string
typedef wchar_t F_SYMBOL;
-#define unbox_symbol_string unbox_u16_string
-#define from_symbol_string from_u16_string
-
#define FACTOR_OS_STRING "wince"
#define FACTOR_DLL L"factor-ce.dll"
#define FACTOR_DLL_NAME "factor-ce.dll"
typedef char F_SYMBOL;
-#define unbox_symbol_string unbox_char_string
-#define from_symbol_string from_char_string
-
#define FACTOR_OS_STRING "winnt"
#define FACTOR_DLL L"factor.dll"
#define FACTOR_DLL_NAME "factor.dll"
void primitive_existsp(void)
{
- F_CHAR *path = unbox_u16_string();
+ F_CHAR *path = (F_CHAR *)(untag_byte_array(dpop()) + 1);
box_boolean(windows_stat(path));
}
typedef wchar_t F_CHAR;
-#define from_native_string from_u16_string
-#define unbox_native_string unbox_u16_string
#define string_to_native_alien(string) string_to_u16_alien(string,true)
#define STRING_LITERAL(string) L##string
dpush(tag_object(reallot_string(string,capacity)));
}
-/* Some ugly macros to prevent a 2x code duplication */
-
-#define MEMORY_TO_STRING(type,utype) \
- F_STRING *memory_to_##type##_string(const type *string, CELL length) \
- { \
- REGISTER_C_STRING(string); \
- gc_root<F_STRING> s(allot_string_internal(length)); \
- UNREGISTER_C_STRING(type,string); \
- CELL i; \
- for(i = 0; i < length; i++) \
- { \
- set_string_nth(s.untagged(),i,(utype)*string); \
- string++; \
- } \
- return s.untagged(); \
- } \
- F_STRING *from_##type##_string(const type *str) \
- { \
- CELL length = 0; \
- const type *scan = str; \
- while(*scan++) length++; \
- return memory_to_##type##_string(str,length); \
- } \
- void box_##type##_string(const type *str) \
- { \
- dpush(str ? tag_object(from_##type##_string(str)) : F); \
- }
-
-MEMORY_TO_STRING(char,u8)
-MEMORY_TO_STRING(u16,u16)
-MEMORY_TO_STRING(u32,u32)
-
-bool check_string(F_STRING *s, CELL max)
-{
- CELL capacity = string_capacity(s);
- CELL i;
- for(i = 0; i < capacity; i++)
- {
- CELL ch = string_nth(s,i);
- if(ch == 0 || ch >= ((CELL)1 << (max * 8)))
- return false;
- }
- return true;
-}
-
-F_BYTE_ARRAY *allot_c_string(CELL capacity, CELL size)
-{
- return allot_byte_array((capacity + 1) * size);
-}
-
-#define STRING_TO_MEMORY(type) \
- void type##_string_to_memory(F_STRING *s, type *string) \
- { \
- CELL i; \
- CELL capacity = string_capacity(s); \
- for(i = 0; i < capacity; i++) \
- string[i] = string_nth(s,i); \
- } \
- void primitive_##type##_string_to_memory(void) \
- { \
- type *address = (type *)unbox_alien(); \
- F_STRING *str = untag_string(dpop()); \
- type##_string_to_memory(str,address); \
- } \
- F_BYTE_ARRAY *string_to_##type##_alien(F_STRING *s_, bool check) \
- { \
- gc_root<F_STRING> s(s_); \
- CELL capacity = string_capacity(s.untagged()); \
- F_BYTE_ARRAY *_c_str; \
- if(check && !check_string(s.untagged(),sizeof(type))) \
- general_error(ERROR_C_STRING,s.value(),F,NULL); \
- _c_str = allot_c_string(capacity,sizeof(type)); \
- type *c_str = (type*)(_c_str + 1); \
- type##_string_to_memory(s.untagged(),c_str); \
- c_str[capacity] = 0; \
- return _c_str; \
- } \
- type *to_##type##_string(F_STRING *s, bool check) \
- { \
- return (type*)(string_to_##type##_alien(s,check) + 1); \
- } \
- type *unbox_##type##_string(void) \
- { \
- return to_##type##_string(untag_string(dpop()),true); \
- }
-
-STRING_TO_MEMORY(char);
-STRING_TO_MEMORY(u16);
-
void primitive_string_nth(void)
{
F_STRING *string = untag_string_fast(dpop());
F_STRING *reallot_string(F_STRING *string, CELL capacity);
void primitive_resize_string(void);
-F_STRING *memory_to_char_string(const char *string, CELL length);
-F_STRING *from_char_string(const char *c_string);
-DLLEXPORT void box_char_string(const char *c_string);
-
-F_STRING *memory_to_u16_string(const u16 *string, CELL length);
-F_STRING *from_u16_string(const u16 *c_string);
-DLLEXPORT void box_u16_string(const u16 *c_string);
-
-void char_string_to_memory(F_STRING *s, char *string);
-F_BYTE_ARRAY *string_to_char_alien(F_STRING *s, bool check);
-char* to_char_string(F_STRING *s, bool check);
-DLLEXPORT char *unbox_char_string(void);
-
-void u16_string_to_memory(F_STRING *s, u16 *string);
-F_BYTE_ARRAY *string_to_u16_alien(F_STRING *s, bool check);
-u16* to_u16_string(F_STRING *s, bool check);
-DLLEXPORT u16 *unbox_u16_string(void);
-
/* String getters and setters */
CELL string_nth(F_STRING* string, CELL index);
void set_string_nth(F_STRING* string, CELL index, CELL value);