1 USING: unicode.data kernel math sequences parser lexer
2 bit-arrays namespaces make sequences.private arrays quotations
3 assocs classes.predicate math.order eval ;
6 ! Character classes (categories)
8 : category# ( char -- category )
9 ! There are a few characters that should be Cn
10 ! that this gives Cf or Mn
11 ! Cf = 26; Mn = 5; Cn = 29
12 ! Use a compressed array instead?
13 dup category-map ?nth [ ] [
14 dup HEX: E0001 HEX: E007F between?
16 HEX: E0100 HEX: E01EF between? 5 29 ?
20 : category ( char -- category )
21 category# categories nth ;
23 : >category-array ( categories -- bitarray )
24 categories [ swap member? ] with map >bit-array ;
26 : as-string ( strings -- bit-array )
27 concat "\"" tuck 3append eval ;
29 : [category] ( categories -- quot )
31 [ [ categories member? not ] filter as-string ] keep
32 [ categories member? ] filter >category-array
33 [ dup category# ] % , [ nth-unsafe [ drop t ] ] %
34 \ member? 2array >quotation ,
38 : define-category ( word categories -- )
39 [category] integer swap define-predicate-class ;
42 CREATE ";" parse-tokens define-category ; parsing
44 : seq-minus ( seq1 seq2 -- diff )
45 [ member? not ] curry filter ;
48 CREATE ";" parse-tokens
49 categories swap seq-minus define-category ; parsing