extra/zoneinfo/zoneinfo.factor

   1 ! Copyright (C) 2009 Doug Coleman.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: accessors arrays ascii assocs assocs.extras calendar
   4 calendar.english combinators combinators.short-circuit
   5 combinators.smart countries grouping interval-maps
   6 io.encodings.utf8 io.files kernel math math.parser namespaces
   7 sequences sequences.extras sorting splitting splitting.extras ;
   8 QUALIFIED: sets
   9 IN: zoneinfo
  10
  11 CONSTANT: zoneinfo-paths
  12 {
  13     "vocab:zoneinfo/africa"
  14     "vocab:zoneinfo/antarctica"
  15     "vocab:zoneinfo/asia"
  16     "vocab:zoneinfo/australasia"
  17     "vocab:zoneinfo/europe"
  18     "vocab:zoneinfo/northamerica"
  19     "vocab:zoneinfo/pacificnew"
  20     "vocab:zoneinfo/southamerica"
  21     "vocab:zoneinfo/backzone"
  22 }
  23
  24 CONSTANT: zoneinfo-extra-paths
  25 {
  26     "vocab:zoneinfo/backward"
  27     "vocab:zoneinfo/etcetera"
  28     "vocab:zoneinfo/factory"
  29     "vocab:zoneinfo/leapseconds"
  30     "vocab:zoneinfo/systemv"
  31 }
  32
  33 : zoneinfo-lines ( path -- seq )
  34     utf8 file-lines
  35     [ { [ length 0 = ] [ "#" head? ] } 1|| ] reject ;
  36
  37 TUPLE: zonetab codes lat lng tz comments ;
  38 C: <zonetab> zonetab
  39
  40 MEMO: zoneinfo-country-zones ( -- seq )
  41     "vocab:zoneinfo/zone1970.tab" zoneinfo-lines
  42     [
  43         "\t" split ?first4
  44         [ "," split ] 3dip
  45         [ "-+" split* first4 [ append ] 2dip append ] 2dip
  46         <zonetab>
  47     ] { } map-as ;
  48
  49 : parse-zonetabs ( -- seq )
  50     zoneinfo-country-zones
  51     [ [ codes>> ] [ tz>> ] bi [ 2array ] curry map ] map concat ;
  52
  53 : lookup-country-name ( seq -- seq' ) alpha-2 ?at drop ; inline
  54 : lookup-country-names ( seq -- seq' ) [ lookup-country-name ] map ;
  55
  56 : timezone>country-map ( -- alist )
  57     parse-zonetabs [ second ] collect-key-by ;
  58
  59 : country>timezones-map ( -- alist )
  60     parse-zonetabs [ first ] collect-value-by ;
  61
  62 : country-timezones-map ( -- alist )
  63     country>timezones-map [ dup lookup-country-names zip ] map-values ;
  64
  65 TUPLE: raw-zone name gmt-offset rules/save format until ;
  66 TUPLE: raw-rule name from to type in on at-time save letters ;
  67 TUPLE: raw-link from to ;
  68 TUPLE: raw-leap year month day hms corr r/s ;
  69
  70 ! TUPLE: zone name ;
  71 ! TUPLE: rule name from to at-time ;
  72
  73 ! : rule-to ( m string -- m n )
  74 !     {
  75 !         { "only" [ dup ] }
  76 !         { "max" [ 1/0. ] }
  77 !         [ string>number ]
  78 !     } case ;
  79
  80 : parse-rule ( seq -- rule )
  81     [
  82         { [ drop ] [ ] [ ] [ ] [ ] [ ] [ ] [ ] [ ] [ ] } spread
  83     ] input<sequence raw-rule boa ;
  84
  85 : parse-link ( seq -- link )
  86     [
  87         { [ drop ] [ ] [ ] } spread
  88     ] input<sequence raw-link boa ;
  89
  90 : parse-leap ( seq -- link )
  91     [
  92         { [ drop ] [ ] [ ] [ ] [ ] [ ] [ ] } spread
  93     ] input<sequence raw-leap boa ;
  94
  95 : parse-zone ( seq -- zone )
  96     {
  97         [ second ]
  98         [ third ]
  99         [ fourth ]
 100         [ 4 swap nth ]
 101         [ 5 tail harvest ]
 102     } cleave raw-zone boa ;
 103
 104 : parse-rest-of-zone ( prev seq -- zone )
 105     [ name>> ] dip {
 106         [ first ]
 107         [ second ]
 108         [ 2 swap nth ]
 109         [ 3 tail harvest ]
 110     } cleave raw-zone boa ;
 111
 112 : parse-zoneinfo-line ( prev/f seq -- tuple )
 113     dup first >lower
 114     {
 115         { "rule" [ nip parse-rule ] }
 116         { "link" [ nip parse-link ] }
 117         { "leap" [ nip parse-leap ] }
 118         { "zone" [ nip parse-zone ] }
 119         [ drop harvest parse-rest-of-zone ]
 120     } case ;
 121
 122 : parse-zoneinfo-file ( path -- seq )
 123     zoneinfo-lines
 124     [ "\t " split harvest ] map harvest
 125     [ parse-zoneinfo-line ] map-with-previous ;
 126
 127 MEMO: zoneinfo-files ( -- seq )
 128     zoneinfo-paths [ parse-zoneinfo-file ] map ;
 129
 130 MEMO: zoneinfo-array ( -- seq )
 131     zoneinfo-files concat ;
 132
 133 MEMO: zoneinfo-assoc ( -- assoc )
 134     zoneinfo-paths [ dup parse-zoneinfo-file ] { } map>assoc ;
 135
 136 : raw-rule-map ( -- assoc )
 137     zoneinfo-array [ raw-rule? ] filter [ name>> ] collect-by ;
 138
 139 : current-rule-map ( -- assoc )
 140     raw-rule-map
 141     [ [ to>> "max" = ] filter ] assoc-map
 142     harvest-values ;
 143
 144 : raw-zone-map ( -- assoc )
 145     zoneinfo-array [ raw-zone? ] filter [ name>> ] collect-by ;
 146
 147 : zoneinfo-zones ( -- seq )
 148     raw-zone-map keys
 149     [ "/" swap subseq? ] partition
 150     [ natural-sort ] bi@ append ;
 151
 152 GENERIC: zone-matches? ( string rule -- ? )
 153
 154 M: raw-rule zone-matches? name>> = ;
 155 M: raw-link zone-matches? from>> = ;
 156 M: raw-leap zone-matches? 2drop f ;
 157 M: raw-zone zone-matches? name>> = ;
 158
 159 : find-rules ( country -- rules )
 160     raw-rule-map
 161     [ [ to>> "max" = ] filter ] assoc-map at ;
 162
 163 ERROR: zone-not-found name ;
 164
 165 : find-zone ( timezone -- zone )
 166     raw-zone-map
 167     [ last ] assoc-map ?at [ zone-not-found ] unless ;
 168
 169 : timezone>rules ( timezone -- rules )
 170     raw-zone-map at ;
 171
 172 : find-zone-rules ( timezone -- zone rules )
 173     find-zone dup rules/save>> find-rules ;
 174
 175 : zone-abbrevs ( -- assoc )
 176     zoneinfo-zones [
 177         find-zone-rules
 178         [ format>> ] dip
 179         [
 180             letters>> dup { "D" "S" } member? [ drop "" ] unless
 181             swap "%" split1
 182             [ 1 tail surround ] [ nip ] if*
 183         ] with V{ } map-as sets:members
 184     ] zip-with ;
 185
 186 : number>value ( n -- n' )
 187     {
 188         { "only" [ f ] }
 189         { "min" [ f ] }
 190         { "max" [ t ] }
 191         [ string>number ]
 192     } case ;
 193
 194 : on>value ( n -- n' )
 195     ! "3", "Thu>=8" always >=, "lastFri"
 196     {
 197         { [ dup 3 swap ?nth CHAR: > = ] [
 198             3 cut 2 tail [ day-abbreviation3-predicate ] [ string>number ] bi* 2array
 199         ] }
 200         { [ dup "last" head? ] [ 4 tail day-abbreviation3-index ] }
 201         [ string>number ]
 202     } cond ;
 203
 204 : zone-month ( timestamp month -- timestamp' )
 205     month-abbreviation-index >>month ;
 206
 207 ERROR: unknown-day-abbrev day ;
 208 : day-abbrev>= ( timestamp day -- timestamp' )
 209     {
 210         { "Sun" [ sunday>= ] }
 211         { "Mon" [ monday>= ] }
 212         { "Tue" [ tuesday>= ] }
 213         { "Wed" [ wednesday>= ] }
 214         { "Thu" [ thursday>= ] }
 215         { "Fri" [ friday>= ] }
 216         { "Sat" [ saturday>= ] }
 217         [ unknown-day-abbrev ]
 218     } case ;
 219
 220 : day-abbrev<= ( timestamp day -- timestamp' )
 221     {
 222         { "Sun" [ sunday<= ] }
 223         { "Mon" [ monday<= ] }
 224         { "Tue" [ tuesday<= ] }
 225         { "Wed" [ wednesday<= ] }
 226         { "Thu" [ thursday<= ] }
 227         { "Fri" [ friday<= ] }
 228         { "Sat" [ saturday<= ] }
 229         [ unknown-day-abbrev ]
 230     } case ;
 231
 232 : comparison-day-string ( timestamp string -- timestamp )
 233     {
 234         { [ ">=" over subseq? ] [ ">=" split1 swap [ string>number >>day ] dip day-abbrev>= ] }
 235         { [ "<=" over subseq? ] [ "<=" split1 swap [ string>number >>day ] dip day-abbrev<= ] }
 236         [ string>number >>day ]
 237     } cond ;
 238
 239 ERROR: unknown-last-day string ;
 240
 241 : last-day-string ( timestamp string -- timestamp )
 242     {
 243         { "lastSun" [ last-sunday-of-month ] }
 244         { "lastMon" [ last-monday-of-month ] }
 245         { "lastTue" [ last-tuesday-of-month ] }
 246         { "lastWed" [ last-wednesday-of-month ] }
 247         { "lastThu" [ last-thursday-of-month ] }
 248         { "lastFri" [ last-friday-of-month ] }
 249         { "lastSat" [ last-saturday-of-month ] }
 250         [ unknown-last-day ]
 251     } case ;
 252
 253 !  "lastFri" | "Fri<=1" | "Sat>=2" | "15"
 254 : zone-day ( timestamp text -- timestamp' )
 255     dup "last" head? [
 256         last-day-string
 257     ] [
 258         comparison-day-string
 259     ] if ;
 260
 261 : string>year ( str -- year )
 262     string>number <year-gmt> ;
 263
 264 : rule-year>years ( raw-rule -- from to )
 265     [ from>> ] [ to>> ] bi
 266     {
 267         { [ over "min" = ] [ [ drop -1/0. ] [ string>year ] bi* ] }
 268         { [ dup "max" = ] [ [ string>year ] [ drop 1/0. ] bi* ] }
 269         { [ dup "only" = ] [ drop dup [ string>year ] bi@ ] }
 270         [ [ string>year ] bi@ ]
 271     } cond ;
 272
 273 : parse-hms ( str -- hms-seq )
 274     ":" split [ string>number ] map 3 0 pad-tail ;
 275
 276 : parse-offset ( str -- hms-seq )
 277     "-" ?head [ parse-hms ] dip [ [ neg ] map ] when ;
 278
 279 ! XXX: Don't just drop the s/u, e.g. 2:00:00s
 280 : zone-time ( timestamp time -- timestamp' )
 281     [ Letter? ] split-tail drop
 282     parse-offset first3 set-time ;
 283
 284 : hm>duration ( str -- duration )
 285     ":" split1 "0" or [ string>number ] bi@
 286     [ instant ] 2dip 0 set-time ;
 287
 288 : rule>timestamp-rest ( timestamp zone -- from )
 289     {
 290         [ over fp-infinity? [ drop ] [ in>> month-abbreviation-index >>month ] if ]
 291         [ over fp-infinity? [ drop ] [ on>> zone-day ] if ]
 292         [ over fp-infinity? [ drop ] [ at-time>> zone-time ] if ]
 293     } cleave ;
 294
 295 : rule>timestamps ( zone -- from to )
 296     [ rule-year>years ] keep
 297     [ nip rule>timestamp-rest ]
 298     [ nipd rule>timestamp-rest ] 3bi ;
 299
 300 : until>timestamp ( seq -- unix-time )
 301     [ 1/0. ] [
 302         4 f pad-tail first4 {
 303             [ string>number <year-gmt> ]
 304             [ [ zone-month ] when* ]
 305             [ [ zone-day ] when* ]
 306             [ [ zone-time ] when* ]
 307         } spread timestamp>unix-time
 308     ] if-empty ;
 309
 310 : raw-zones>interval-map ( raw-zones -- interval-map )
 311     [
 312         [ until>> until>timestamp ] map
 313         -1/0. prefix 2 <clumps> [ >array ] map
 314     ] keep zip
 315     [ first2 1 - 2array ] map-keys <interval-map> ;
 316
 317 : timezone>interval-map ( timezone-name -- interval-map )
 318     raw-zone-map at raw-zones>interval-map ;
 319
 320 : gmt-offset ( timestamp timezone-name -- gmt-offset )
 321     [ timestamp>unix-time ]
 322     [ raw-zones>interval-map ] bi* interval-at ;
 323
 324 : name>rules ( name -- rules )
 325     raw-rule-map at [
 326         [
 327             [ rule>timestamps [ dup fp-infinity? [ timestamp>unix-time ] unless ] bi@ 2array ]
 328             [ [ save>> hm>duration ] [ letters>> ] bi 2array ] bi 2array
 329         ] map
 330     ] keep zip ;
 331
 332 : chicago-zones ( -- interval-map ) "America/Chicago" timezone>interval-map ;
 333 : us-rules ( -- rules ) "US" name>rules ;
 334 : us-timezones ( -- timezones )
 335     country>timezones-map "US" of ;
 336
 337 : puerto-rico-timezone-countries ( -- countries )
 338     timezone>country-map "America/Puerto_Rico" of lookup-country-names ;