]> gitweb.factorcode.org Git - factor.git/blob - basis/english/english.factor
unicode: make this the API for all unicode things.
[factor.git] / basis / english / english.factor
1 ! Copyright (C) 2015 John Benediktsson
2 ! See http://factorcode.org/license.txt for BSD license
3 USING: assocs assocs.extras combinators formatting kernel literals
4 locals math math.parser sequences splitting unicode ;
5
6 IN: english
7
8 <PRIVATE
9
10 <<
11 ! Irregular pluralizations
12 CONSTANT: singular-to-plural H{
13
14     ! us -> i
15     { "alumnus" "alumni" }
16     { "cactus" "cacti" }
17     { "focus" "foci" }
18     { "fungus" "fungi" }
19     { "nucleus" "nuclei" }
20     { "radius" "radii" }
21     { "stimulus" "stimuli" }
22
23     ! is -> es
24     { "analysis" "analyses" }
25     { "axis" "axes" }
26     { "basis" "bases" }
27     { "crisis" "crises" }
28     { "diagnosis" "diagnoses" }
29     { "ellipsis" "ellipses" }
30     { "hypothesis" "hypotheses" }
31     { "oasis" "oases" }
32     { "paralysis" "paralyses" }
33     { "parenthesis" "parentheses" }
34     { "synopsis" "synopses" }
35     { "synthesis" "syntheses" }
36     { "thesis" "theses" }
37
38     ! ix -> ices
39     { "appendix" "appendices" }
40     { "index" "indices" }
41     { "matrix" "matrices" }
42
43     ! eau -> eaux
44     { "beau" "beaux" }
45     { "bureau" "bureaus" }
46     { "tableau" "tableaux" }
47
48     ! ? -> en
49     { "child" "children" }
50     { "man" "men" }
51     { "ox" "oxen" }
52     { "woman" "women" }
53
54     ! ? -> a
55     { "bacterium" "bacteria" }
56     { "corpus" "corpora" }
57     { "criterion" "criteria" }
58     { "curriculum" "curricula" }
59     { "datum" "data" }
60     { "genus" "genera" }
61     { "medium" "media" }
62     { "memorandum" "memoranda" }
63     { "phenomenon" "phenomena" }
64     { "stratum" "strata" }
65
66     ! no change
67     { "bison" "bison" }
68     { "deer" "deer" }
69     { "fish" "fish" }
70     { "means" "means" }
71     { "moose" "moose" }
72     { "offspring" "offspring" }
73     { "series" "series" }
74     { "sheep" "sheep" }
75     { "species" "species" }
76     { "swine" "swine" }
77
78     ! oo -> ee
79     { "foot" "feet" }
80     { "goose" "geese" }
81     { "tooth" "teeth" }
82
83     ! a -> ae
84     { "antenna" "antennae" }
85     { "formula" "formulae" }
86     { "nebula" "nebulae" }
87     { "vertebra" "vertebrae" }
88     { "vita" "vitae" }
89
90     ! ouse -> ice
91     { "louse" "lice" }
92     { "mouse" "mice" }
93 }
94 >>
95
96 CONSTANT: plural-to-singular $[ singular-to-plural assoc-invert ]
97
98 :: match-case ( master disciple -- master' )
99     {
100         { [ master >lower master = ] [ disciple >lower ] }
101         { [ master >upper master = ] [ disciple >upper ] }
102         { [ master >title master = ] [ disciple >title ] }
103         [ disciple ]
104     } cond ;
105
106 PRIVATE>
107
108 : singularize ( word -- singular )
109     dup >lower {
110         { [ dup empty? ] [ ] }
111         { [ dup singular-to-plural key? ] [ ] }
112         { [ plural-to-singular ?at ] [ ] }
113         { [ dup "s" tail? not ] [ ] }
114         {
115             [
116                 dup "ies" ?tail [
117                     last "aeiou" member? not
118                 ] [ drop f ] if
119             ] [ 3 head* "y" append ]
120         }
121         { [ dup "es" tail? ] [ 2 head* ] }
122         [ but-last ]
123     } cond match-case ;
124
125 : pluralize ( word -- plural )
126     dup >lower {
127         { [ dup empty? ] [ ] }
128         { [ dup plural-to-singular key? ] [ ] }
129         { [ singular-to-plural ?at ] [ ] }
130         {
131             [
132                 dup "y" ?tail [
133                     last "aeiou" member? not
134                 ] [ drop f ] if
135             ] [ but-last "ies" append ]
136         }
137         {
138             [ dup { "s" "ch" "sh" } [ tail? ] with any? ]
139             [ dup "es" tail? [ "es" append ] unless ]
140         }
141         [ "s" append ]
142     } cond match-case ;
143
144 : count-of-things ( count word -- str )
145     over 1 = [ pluralize ] unless "%d %s" sprintf ;
146
147 : a10n ( str -- str' )
148     dup length 3 > [
149         [ 1 head ] [ length 2 - number>string ] [ 1 tail* ] tri
150         3append
151     ] when ;