1 ! Copyright (C) 2018 John Benediktsson
2 ! See http://factorcode.org/license.txt for BSD license
4 USING: accessors alien.c-types alien.endian assocs calendar
5 calendar.parser classes.struct combinators endian io
6 io.encodings.8-bit io.encodings.ascii io.encodings.binary
7 io.encodings.string io.files io.streams.byte-array kernel math
8 math.parser namespaces sequences ;
14 CONSTANT: dbf-encodings H{
75 { 0x96 "mac-cyrillic" }
85 CONSTANT: dbf-file-types H{
87 { 0x03 "FoxBASE+/dBase III plus, no memo" }
88 { 0x04 "dBase IV, no memo" }
89 { 0x05 "dBase V, no memo" }
90 { 0x07 "Visual Objects 1.x" }
91 { 0x30 "Visual FoxPro" }
92 { 0x31 "Visual FoxPro, autoincrement enabled" }
93 { 0x32 "Visual FoxPro with field type Varchar or Varbinary" }
94 { 0x43 "dBase IV SQL table files, no memo" }
95 { 0x63 "dBase IV SQL system files, no memo" }
96 { 0x7b "dBase IV, with memo" }
97 { 0x83 "FoxBASE+/dBase III PLUS, with memo" }
98 { 0x87 "Visual Objects 1.x, with memo" }
99 { 0x8B "dBase IV with memo" }
100 { 0x8E "dBase IV with SQL table" }
101 { 0xCB "dBase IV SQL table files, with memo" }
102 { 0xE5 "HiPer-Six format with SMT memo file" }
103 { 0xF5 "FoxPro 2.x (or earlier) with memo" }
107 STRUCT: dbf-file-header
108 { file-type uint8_t }
112 { #records uint32_t }
113 { header-length uint16_t }
114 { record-length uint16_t }
115 { reserved1 uint16_t }
116 { incomplete-transaction uint8_t }
117 { encryption-flag uint8_t }
118 { free-record-thread uint32_t }
119 { reserved2 uint32_t }
120 { reserved3 uint32_t }
122 { language-driver uint8_t }
123 { reserved4 uint16_t } ;
125 : read-file-header ( -- file-header )
126 dbf-file-header read-struct ;
128 CONSTANT: dbf-field-flags H{
129 { 0x01 "System Column (not visible to user)" }
130 { 0x02 "Column can store null values" }
131 { 0x04 "Binary column (for CHAR and MEMO only)" }
132 { 0x06 "(0x02+0x04) When a field is NULL and binary (Integer, Currency, and Character/Memo fields)" }
133 { 0x0C "Column is autoincrementing" }
136 STRUCT: dbf-field-header
141 { #decimals uint8_t }
142 { reserved1 uint16_t }
143 { workarea-id uint8_t }
144 { reserved2 uint8_t }
145 { reserved3 uint8_t }
146 { set-fields-flag uint8_t }
147 { reserved4 uint8_t[7] }
148 { index-field-flag uint8_t } ;
150 : read-field-headers ( -- field-headers )
151 [ read1 dup { CHAR: \r CHAR: \n f } member? not ] [
152 dbf-field-header heap-size 1 - read swap prefix
153 dbf-field-header memory>struct
156 : check-field-header ( field-header -- field-header )
158 { CHAR: I [ dup length>> 4 assert= ] }
159 { CHAR: L [ dup length>> 1 assert= ] }
160 { CHAR: O [ dup length>> 8 assert= ] }
161 { CHAR: Y [ dup length>> 8 assert= ] }
162 { CHAR: D [ dup length>> 8 assert= ] }
163 { CHAR: T [ dup length>> 8 assert= ] }
164 { CHAR: M [ dup length>> 10 assert= ] }
168 : check-record-length ( file-header field-headers -- )
169 [ record-length>> ] [ [ length>> ] map-sum ] bi* assert= ;
173 TUPLE: record deleted? values ;
175 : read-records ( field-headers -- records )
176 [ read1 dup { 0x1a f } member? not ]
180 [ type>> parse-field ] bi
184 TUPLE: dbf file-header field-headers records ;
186 : load-dbf ( path -- dbf )
190 over header-length>> seek-absolute seek-input
191 over language-driver>> dbf-encodings at dbf-encoding [
192 dup read-records dbf boa
196 : seek-record ( n file-header -- )
197 [ record-length>> * ] [ header-length>> ] bi +
198 seek-absolute seek-input ;
200 : parse-string ( byte-array -- string )
201 [ " \0" member? ] trim-tail dbf-encoding get decode ;
203 : parse-date ( byte-array -- date/f )
204 dup [ " \0" member? ] all? [ drop f ] [
205 binary [ read-ymd <date-gmt> ] with-byte-reader
208 : parse-float ( byte-array -- n )
209 [ "\r\n\t *" member? ] trim string>number ;
211 : parse-int ( byte-array -- n )
212 dup length 4 assert= le> ;
214 : parse-short ( byte-array -- n )
215 dup length 2 assert= le> ;
219 ERROR: illegal-logical value ;
221 : parse-logical ( byte-array -- n )
223 { [ dup "TtYy" member? ] [ drop t ] }
224 { [ dup "FfNn" member? ] [ drop f ] }
225 { [ dup "? " member? ] [ drop unknown ] }
229 : parse-numeric ( byte-array -- n )
230 [ "\r\n\t *" member? ] trim
231 H{ { CHAR: , CHAR: . } } substitute string>number ;
233 : parse-double ( byte-array -- n )
234 dup length 8 assert= le> bits>double ;
236 : parse-currency ( byte-array -- n )
237 dup length 8 assert= le> 10000 / ;
239 : parse-timestamp ( byte-array -- timestamp )
240 [ -4713 1 1 <date> ] dip 4 cut [ le> ] bi@
241 [ days time+ ] [ milliseconds time+ ] bi* ;
243 ERROR: unsupported-field-type type ;
245 : parse-field ( byte-array type -- data )
248 { CHAR: 2 [ parse-short ] }
249 { CHAR: 4 [ parse-int ] }
250 { CHAR: 8 [ parse-double ] }
251 { CHAR: C [ parse-string ] }
252 { CHAR: D [ parse-date ] }
253 { CHAR: F [ parse-float ] }
254 { CHAR: I [ parse-int ] }
255 { CHAR: L [ parse-logical ] }
256 { CHAR: N [ parse-numeric ] }
257 { CHAR: O [ parse-double ] }
258 { CHAR: V [ parse-string ] }
259 { CHAR: Y [ parse-currency ] }
260 { CHAR: @ [ parse-timestamp ] }
261 ! { CHAR: + [ parse-autoincrement ] }
262 ! { CHAR: M [ parse-memo ] }
263 ! { CHAR: T [ parse-datetime ] }
264 ! { CHAR: B [ parse-double? ] } ! (only on dbversion in [0x30, 0x31, 0x32])
265 ! { CHAR: G [ parse-general ] }
266 ! { CHAR: P [ parse-picture ] }
267 ! { CHAR: Q [ parse-varbinary ] }
268 [ unsupported-field-type ]
271 : dbase3-memo ( n path -- data )
273 512 * seek-absolute seek-input
276 dup [ B{ 0 0x1a } member? ] find drop
277 [ head f ] [ t ] if* [ append ] dip
281 LE-STRUCT: db4-memo-header
282 { reserved uint } ! B{ 0xff 0xff 0x08 0x08 }
285 : dbase4-memo ( n path -- data )
287 512 * seek-absolute seek-input
288 db4-memo-header read-struct length>> read
291 BE-STRUCT: vfp-file-header
295 { reserved2 uchar[504] } ;
297 BE-STRUCT: vfp-memo-header
301 CONSTANT: vfp-memo-types H{
302 { 0x0 "picture memo" }
304 { 0x2 "object memo" }
307 : vfp-memo ( n path -- data )
309 vfp-file-header read-struct blocksize>> *
310 seek-absolute seek-input
311 vfp-memo-header read-struct length>> read