1 ! Copyright (C) 2018 John Benediktsson
2 ! See http://factorcode.org/license.txt for BSD license
4 USING: accessors alien.c-types alien.endian alien.strings assocs
5 calendar calendar.parser classes.struct combinators fry io
6 io.binary io.encodings.8-bit io.encodings.ascii
7 io.encodings.binary io.encodings.string io.files
8 io.streams.byte-array kernel math math.parser namespaces
9 sequences splitting strings ;
15 CONSTANT: dbf-encodings H{
76 { 0x96 "mac-cyrillic" }
86 CONSTANT: dbf-file-types H{
88 { 0x03 "FoxBASE+/dBase III plus, no memo" }
89 { 0x04 "dBase IV, no memo" }
90 { 0x05 "dBase V, no memo" }
91 { 0x07 "Visual Objects 1.x" }
92 { 0x30 "Visual FoxPro" }
93 { 0x31 "Visual FoxPro, autoincrement enabled" }
94 { 0x32 "Visual FoxPro with field type Varchar or Varbinary" }
95 { 0x43 "dBase IV SQL table files, no memo" }
96 { 0x63 "dBase IV SQL system files, no memo" }
97 { 0x7b "dBase IV, with memo" }
98 { 0x83 "FoxBASE+/dBase III PLUS, with memo" }
99 { 0x87 "Visual Objects 1.x, with memo" }
100 { 0x8B "dBase IV with memo" }
101 { 0x8E "dBase IV with SQL table" }
102 { 0xCB "dBase IV SQL table files, with memo" }
103 { 0xE5 "HiPer-Six format with SMT memo file" }
104 { 0xF5 "FoxPro 2.x (or earlier) with memo" }
108 STRUCT: dbf-file-header
109 { file-type uint8_t }
113 { #records uint32_t }
114 { header-length uint16_t }
115 { record-length uint16_t }
116 { reserved1 uint16_t }
117 { incomplete-transaction uint8_t }
118 { encryption-flag uint8_t }
119 { free-record-thread uint32_t }
120 { reserved2 uint32_t }
121 { reserved3 uint32_t }
123 { language-driver uint8_t }
124 { reserved4 uint16_t } ;
126 : read-file-header ( -- file-header )
127 dbf-file-header read-struct ;
129 CONSTANT: dbf-field-flags H{
130 { 0x01 "System Column (not visible to user)" }
131 { 0x02 "Column can store null values" }
132 { 0x04 "Binary column (for CHAR and MEMO only)" }
133 { 0x06 "(0x02+0x04) When a field is NULL and binary (Integer, Currency, and Character/Memo fields)" }
134 { 0x0C "Column is autoincrementing" }
137 STRUCT: dbf-field-header
142 { #decimals uint8_t }
143 { reserved1 uint16_t }
144 { workarea-id uint8_t }
145 { reserved2 uint8_t }
146 { reserved3 uint8_t }
147 { set-fields-flag uint8_t }
148 { reserved4 uint8_t[7] }
149 { index-field-flag uint8_t } ;
151 : read-field-headers ( -- field-headers )
152 [ read1 dup { CHAR: \r CHAR: \n f } member? not ] [
153 dbf-field-header heap-size 1 - read swap prefix
154 dbf-field-header memory>struct
157 : check-field-header ( field-header -- field-header )
159 { CHAR: I [ dup length>> 4 assert= ] }
160 { CHAR: L [ dup length>> 1 assert= ] }
161 { CHAR: O [ dup length>> 8 assert= ] }
162 { CHAR: Y [ dup length>> 8 assert= ] }
163 { CHAR: D [ dup length>> 8 assert= ] }
164 { CHAR: T [ dup length>> 8 assert= ] }
165 { CHAR: M [ dup length>> 10 assert= ] }
169 : check-record-length ( file-header field-headers -- )
170 [ record-length>> ] [ [ length>> ] map-sum ] bi* assert= ;
174 TUPLE: record deleted? values ;
176 : read-records ( field-headers -- records )
177 [ read1 dup { 0x1a f } member? not ]
181 [ type>> parse-field ] bi
185 TUPLE: dbf file-header field-headers records ;
187 : load-dbf ( path -- dbf )
191 over header-length>> seek-absolute seek-input
192 over language-driver>> dbf-encodings at dbf-encoding [
193 dup read-records dbf boa
197 : seek-record ( n file-header -- )
198 [ record-length>> * ] [ header-length>> ] bi +
199 seek-absolute seek-input ;
201 : parse-string ( byte-array -- string )
202 [ " \0" member? ] trim-tail dbf-encoding get decode ;
204 : parse-date ( byte-array -- date/f )
205 dup [ " \0" member? ] all? [ drop f ] [
206 binary [ (ymd>timestamp) ] with-byte-reader
209 : parse-float ( byte-array -- n )
210 [ "\r\n\t *" member? ] trim string>number ;
212 : parse-int ( byte-array -- n )
213 dup length 4 assert= le> ;
215 : parse-short ( byte-array -- n )
216 dup length 2 assert= le> ;
220 ERROR: illegal-logical value ;
222 : parse-logical ( byte-array -- n )
224 { [ dup "TtYy" member? ] [ drop t ] }
225 { [ dup "FfNn" member? ] [ drop f ] }
226 { [ dup "? " member? ] [ drop unknown ] }
230 : parse-numeric ( byte-array -- n )
231 [ "\r\n\t *" member? ] trim "," "." replace string>number ;
233 : parse-double ( byte-array -- n )
234 dup length 8 assert= le> bits>double ;
236 : parse-currency ( byte-array -- n )
237 dup length 8 assert= le> 10000 / ;
239 : parse-timestamp ( byte-array -- timestamp )
240 [ -4713 1 1 <date> ] dip 4 cut [ le> ] bi@
241 [ days time+ ] [ milliseconds time+ ] bi* ;
243 ERROR: unsupported-field-type type ;
245 : parse-field ( byte-array type -- data )
248 { CHAR: 2 [ parse-short ] }
249 { CHAR: 4 [ parse-int ] }
250 { CHAR: 8 [ parse-double ] }
251 { CHAR: C [ parse-string ] }
252 { CHAR: D [ parse-date ] }
253 { CHAR: F [ parse-float ] }
254 { CHAR: I [ parse-int ] }
255 { CHAR: L [ parse-logical ] }
256 { CHAR: N [ parse-numeric ] }
257 { CHAR: O [ parse-double ] }
258 { CHAR: V [ parse-string ] }
259 { CHAR: Y [ parse-currency ] }
260 { CHAR: @ [ parse-timestamp ] }
261 ! { CHAR: + [ parse-autoincrement ] }
262 ! { CHAR: M [ parse-memo ] }
263 ! { CHAR: T [ parse-datetime ] }
264 ! { CHAR: B [ parse-double? ] } ! (only on dbversion in [0x30, 0x31, 0x32])
265 ! { CHAR: G [ parse-general ] }
266 ! { CHAR: P [ parse-picture ] }
267 ! { CHAR: Q [ parse-varbinary ] }
268 [ unsupported-field-type ]
271 : dbase3-memo ( n path -- data )
273 512 * seek-absolute seek-input
276 dup [ B{ 0 0x1a } member? ] find drop
277 [ head f ] [ t ] if* [ append ] dip
281 LE-STRUCT: db4-memo-header
282 { reserved uint } ! B{ 0xff 0xff 0x08 0x08 }
285 : dbase4-memo ( n path -- data )
287 512 * seek-absolute seek-input
288 db4-memo-header read-struct length>> read
291 BE-STRUCT: vfp-file-header
295 { reserved2 uchar[504] } ;
297 BE-STRUCT: vfp-memo-header
301 CONSTANT: vfp-memo-types H{
302 { 0x0 "picture memo" }
304 { 0x2 "object memo" }
307 : vfp-memo ( n path -- data )
309 vfp-file-header read-struct blocksize>> *
310 seek-absolute seek-input
311 vfp-memo-header read-struct length>> read