From: John Benediktsson Date: Thu, 31 Mar 2016 06:29:48 +0000 (-0700) Subject: unicode: make this the API for all unicode things. X-Git-Tag: unmaintained~1289 X-Git-Url: https://gitweb.factorcode.org/gitweb.cgi?p=factor.git;a=commitdiff_plain;h=498285d7dd66d0e6875ef774e0b1c28cca1aa110 unicode: make this the API for all unicode things. --- diff --git a/basis/alien/libraries/finder/linux/linux.factor b/basis/alien/libraries/finder/linux/linux.factor index 406cb9a41e..d21b1b0dd1 100644 --- a/basis/alien/libraries/finder/linux/linux.factor +++ b/basis/alien/libraries/finder/linux/linux.factor @@ -3,7 +3,7 @@ USING: alien.libraries.finder arrays assocs combinators.short-circuit io io.encodings.utf8 io.files io.files.info io.launcher kernel sequences sets splitting system -unicode.categories ; +unicode ; IN: alien.libraries.finder.linux format-float ; IN: formatting diff --git a/basis/ftp/server/server.factor b/basis/ftp/server/server.factor index 5d10a125ff..55b8218e74 100644 --- a/basis/ftp/server/server.factor +++ b/basis/ftp/server/server.factor @@ -8,7 +8,7 @@ io.files io.files.info io.files.types io.pathnames io.servers io.sockets io.streams.string io.timeouts kernel logging math math.bitwise math.parser namespaces sequences simple-tokenizer splitting strings threads -tools.files unicode.case ; +tools.files unicode ; IN: ftp.server SYMBOL: server diff --git a/basis/globs/globs.factor b/basis/globs/globs.factor index 285578073d..7e86708ba4 100644 --- a/basis/globs/globs.factor +++ b/basis/globs/globs.factor @@ -3,7 +3,7 @@ USING: accessors arrays combinators combinators.short-circuit io.directories io.files io.files.info io.pathnames kernel locals make peg.ebnf regexp regexp.combinators sequences splitting -strings system unicode.case ; +strings system unicode ; IN: globs : not-path-separator ( -- sep ) diff --git a/basis/help/apropos/apropos.factor b/basis/help/apropos/apropos.factor index 6c48041537..1e79727130 100644 --- a/basis/help/apropos/apropos.factor +++ b/basis/help/apropos/apropos.factor @@ -3,8 +3,7 @@ USING: accessors arrays assocs fry help.markup help.topics io kernel make math math.parser namespaces sequences sorting summary tools.completion vocabs.hierarchy help.vocabs -vocabs words unicode.case help unicode.categories -combinators locals ; +vocabs words unicode help combinators locals ; IN: help.apropos : $completions ( seq -- ) diff --git a/basis/help/lint/checks/checks.factor b/basis/help/lint/checks/checks.factor index 0133e3b7c8..f4db83d67d 100644 --- a/basis/help/lint/checks/checks.factor +++ b/basis/help/lint/checks/checks.factor @@ -5,8 +5,8 @@ classes.tuple combinators combinators.short-circuit debugger definitions effects eval formatting fry grouping help help.markup help.topics io io.streams.string kernel macros math namespaces sequences sequences.deep sets splitting strings -summary tools.destructors unicode.categories vocabs -vocabs.loader words words.constant words.symbol ; +summary tools.destructors unicode vocabs vocabs.loader words +words.constant words.symbol ; IN: help.lint.checks ERROR: simple-lint-error message ; diff --git a/basis/help/search/search.factor b/basis/help/search/search.factor index 9d6ff60107..87db04467f 100644 --- a/basis/help/search/search.factor +++ b/basis/help/search/search.factor @@ -4,8 +4,8 @@ USING: arrays assocs combinators combinators.short-circuit fry help help.apropos help.markup help.stylesheet help.topics io io.streams.string io.styles kernel math memoize namespaces -sequences sequences.deep sorting splitting strings unicode.case -unicode.categories words ; +sequences sequences.deep sorting splitting strings unicode +words ; IN: help.search diff --git a/basis/help/tutorial/tutorial.factor b/basis/help/tutorial/tutorial.factor index 6e9b40bf07..7d4611867b 100644 --- a/basis/help/tutorial/tutorial.factor +++ b/basis/help/tutorial/tutorial.factor @@ -1,7 +1,6 @@ USING: help.markup help.syntax ui.commands ui.operations editors vocabs.loader kernel sequences prettyprint tools.test -vocabs.refresh strings unicode.categories unicode.case -ui.tools.browser ui.tools.common ; +vocabs.refresh strings unicode ui.tools.browser ui.tools.common ; IN: help.tutorial ARTICLE: "first-program-start" "Creating a vocabulary for your first program" diff --git a/basis/html/templates/chloe/syntax/syntax.factor b/basis/html/templates/chloe/syntax/syntax.factor index 264559e2d8..5d02982610 100644 --- a/basis/html/templates/chloe/syntax/syntax.factor +++ b/basis/html/templates/chloe/syntax/syntax.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors sequences combinators kernel namespaces classes.tuple assocs splitting words arrays memoize parser lexer io io.files -io.encodings.utf8 io.streams.string unicode.case mirrors fry math urls +io.encodings.utf8 io.streams.string unicode mirrors fry math urls multiline xml xml.data xml.writer xml.syntax html.components html.templates ; IN: html.templates.chloe.syntax diff --git a/basis/http/server/dispatchers/dispatchers.factor b/basis/http/server/dispatchers/dispatchers.factor index 405d96d1f5..1534a5a8cd 100644 --- a/basis/http/server/dispatchers/dispatchers.factor +++ b/basis/http/server/dispatchers/dispatchers.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: kernel namespaces sequences assocs accessors splitting -unicode.case urls http http.server http.server.responses ; +unicode urls http http.server http.server.responses ; IN: http.server.dispatchers TUPLE: dispatcher default responders ; diff --git a/basis/http/server/server.factor b/basis/http/server/server.factor index d074fe4fcd..583920c49b 100644 --- a/basis/http/server/server.factor +++ b/basis/http/server/server.factor @@ -19,7 +19,7 @@ io.servers io.timeouts io.crlf fry logging logging.insomniac calendar urls -unicode.categories +unicode http http.server.requests http.server.responses diff --git a/basis/images/loader/gtk/gtk.factor b/basis/images/loader/gtk/gtk.factor index fbdab99f47..f0bf3ac03c 100644 --- a/basis/images/loader/gtk/gtk.factor +++ b/basis/images/loader/gtk/gtk.factor @@ -3,7 +3,7 @@ USING: accessors alien.c-types alien.data alien.syntax arrays assocs combinators destructors gdk.pixbuf.ffi glib.ffi gobject.ffi grouping images images.loader io kernel locals math sequences system -specialized-arrays unicode.case ; +specialized-arrays unicode ; IN: images.loader.gtk SPECIALIZED-ARRAY: uchar diff --git a/basis/io/directories/search/search.factor b/basis/io/directories/search/search.factor index e0df575bf0..79e2d9cd20 100644 --- a/basis/io/directories/search/search.factor +++ b/basis/io/directories/search/search.factor @@ -3,7 +3,7 @@ USING: accessors arrays assocs continuations deques dlists fry io.backend io.directories io.files.info io.pathnames kernel kernel.private locals math sequences sequences.extras sorting -strings system unicode.case vocabs ; +strings system unicode vocabs ; IN: io.directories.search : qualified-directory-entries ( path -- seq ) diff --git a/basis/io/sockets/secure/openssl/openssl.factor b/basis/io/sockets/secure/openssl/openssl.factor index d5346f01b4..21d026bd14 100644 --- a/basis/io/sockets/secure/openssl/openssl.factor +++ b/basis/io/sockets/secure/openssl/openssl.factor @@ -7,7 +7,7 @@ io.binary io.buffers io.encodings.8-bit.latin1 io.encodings.utf8 io.files io.pathnames io.ports io.sockets io.sockets.secure io.timeouts kernel libc locals math math.functions math.order math.parser memoize namespaces openssl openssl.libcrypto -openssl.libssl random sequences sets splitting unicode.case ; +openssl.libssl random sequences sets splitting unicode ; IN: io.sockets.secure.openssl GENERIC: ssl-method ( symbol -- method ) diff --git a/basis/io/standard-paths/windows/windows.factor b/basis/io/standard-paths/windows/windows.factor index 29547d43f2..b0870c22d2 100644 --- a/basis/io/standard-paths/windows/windows.factor +++ b/basis/io/standard-paths/windows/windows.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: environment fry io.directories.search.windows io.files io.pathnames io.standard-paths kernel sequences splitting -system unicode.case ; +system unicode ; IN: io.standard-paths.windows M: windows find-in-applications diff --git a/basis/math/statistics/statistics-docs.factor b/basis/math/statistics/statistics-docs.factor index df4a8c4d37..156877f329 100644 --- a/basis/math/statistics/statistics-docs.factor +++ b/basis/math/statistics/statistics-docs.factor @@ -105,7 +105,7 @@ HELP: histogram-by { $description "Returns a hashtable where the keys are the elements of the sequence binned by being passed through " { $snippet "quot" } ", and the values are the number of times members of each bin appeared in that sequence." } { $examples { $unchecked-example "! Count the number of times letters and non-letters appear in a sequence." - "USING: prettyprint math.statistics unicode.categories ;" + "USING: prettyprint math.statistics unicode ;" "\"aaa123bc\" [ letter? ] histogram-by ." "H{ { t 5 } { f 3 } }" } diff --git a/basis/models/search/search.factor b/basis/models/search/search.factor index 5ecb0fa34a..061acc7d9c 100644 --- a/basis/models/search/search.factor +++ b/basis/models/search/search.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008, 2009 Slava Pestov ! See http://factorcode.org/license.txt for BSD license. -USING: fry kernel models.arrow.smart sequences unicode.case ; +USING: fry kernel models.arrow.smart sequences unicode ; IN: models.search : ( values search quot -- model ) diff --git a/basis/peg/ebnf/ebnf-tests.factor b/basis/peg/ebnf/ebnf-tests.factor index c0c7568913..84311bc24e 100644 --- a/basis/peg/ebnf/ebnf-tests.factor +++ b/basis/peg/ebnf/ebnf-tests.factor @@ -3,7 +3,7 @@ ! USING: kernel tools.test peg peg.ebnf peg.ebnf.private words math math.parser sequences accessors peg.parsers parser -namespaces arrays strings eval unicode.data multiline ; +namespaces arrays strings eval unicode multiline ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ diff --git a/basis/peg/ebnf/ebnf.factor b/basis/peg/ebnf/ebnf.factor index 601804cfda..78639595eb 100644 --- a/basis/peg/ebnf/ebnf.factor +++ b/basis/peg/ebnf/ebnf.factor @@ -3,7 +3,7 @@ USING: accessors assocs combinators combinators.short-circuit effects kernel make math math.parser multiline namespaces parser peg peg.parsers quotations sequences sequences.deep splitting -stack-checker strings strings.parser summary unicode.categories +stack-checker strings strings.parser summary unicode vocabs.parser words ; FROM: vocabs.parser => search ; FROM: peg.search => replace ; diff --git a/basis/peg/parsers/parsers-docs.factor b/basis/peg/parsers/parsers-docs.factor index adac0cf65b..af308399ec 100644 --- a/basis/peg/parsers/parsers-docs.factor +++ b/basis/peg/parsers/parsers-docs.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2008 Chris Double, Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: help.markup help.syntax kernel math sequences -unicode.categories strings ; +unicode strings ; IN: peg.parsers HELP: 1token diff --git a/basis/peg/parsers/parsers.factor b/basis/peg/parsers/parsers.factor index 3feddd3447..23b262568d 100644 --- a/basis/peg/parsers/parsers.factor +++ b/basis/peg/parsers/parsers.factor @@ -1,8 +1,7 @@ ! Copyright (C) 2007, 2008 Chris Double, Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel sequences strings namespaces make math assocs -vectors arrays math.parser accessors unicode.categories -sequences.deep peg peg.private peg.search math.ranges words ; +USING: accessors kernel make math math.parser math.ranges peg +peg.private peg.search sequences strings unicode vectors ; IN: peg.parsers TUPLE: just-parser p1 ; diff --git a/basis/peg/peg.factor b/basis/peg/peg.factor index 1ac261494b..f0545a17a3 100644 --- a/basis/peg/peg.factor +++ b/basis/peg/peg.factor @@ -3,8 +3,7 @@ USING: accessors arrays assocs classes combinators combinators.short-circuit compiler.units effects.parser fry generalizations kernel locals make math math.order namespaces -quotations sequences sets splitting unicode.categories vectors -words ; +quotations sequences sets splitting unicode vectors words ; IN: peg TUPLE: parse-result remaining ast ; diff --git a/basis/regexp/classes/classes.factor b/basis/regexp/classes/classes.factor index 71e70c60d9..e50d7bfccb 100644 --- a/basis/regexp/classes/classes.factor +++ b/basis/regexp/classes/classes.factor @@ -1,9 +1,8 @@ ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors kernel math math.order words combinators -combinators.smart combinators.short-circuit locals -unicode.categories sequences fry macros arrays assocs sets -classes unicode.script unicode.data ; +USING: accessors arrays assocs classes combinators +combinators.short-circuit combinators.smart fry kernel locals +math math.order sequences sets unicode unicode.data ; FROM: ascii => ascii? ; IN: regexp.classes diff --git a/basis/regexp/compiler/compiler.factor b/basis/regexp/compiler/compiler.factor index 45e17306de..274b9a5797 100644 --- a/basis/regexp/compiler/compiler.factor +++ b/basis/regexp/compiler/compiler.factor @@ -1,10 +1,9 @@ ! Copyright (C) 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: regexp.classes kernel sequences regexp.negation -quotations assocs fry math locals combinators sets -accessors words compiler.units kernel.private strings -sequences.private arrays namespaces unicode.breaks -regexp.transition-tables combinators.short-circuit ; +USING: accessors assocs combinators combinators.short-circuit +fry kernel kernel.private locals math namespaces regexp.classes +regexp.transition-tables sequences sequences.private sets +strings unicode words ; IN: regexp.compiler GENERIC: question>quot ( question -- quot ) diff --git a/basis/regexp/dfa/dfa.factor b/basis/regexp/dfa/dfa.factor index 5c77741032..c65ae25a7a 100644 --- a/basis/regexp/dfa/dfa.factor +++ b/basis/regexp/dfa/dfa.factor @@ -1,8 +1,7 @@ ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors arrays assocs combinators fry kernel locals -math math.order regexp.nfa regexp.transition-tables sequences -sets sorting vectors regexp.ast regexp.classes ; +USING: accessors arrays assocs fry kernel locals regexp.ast +regexp.classes regexp.transition-tables sequences sets vectors ; IN: regexp.dfa : find-delta ( states transition nfa -- new-states ) diff --git a/basis/regexp/disambiguate/disambiguate.factor b/basis/regexp/disambiguate/disambiguate.factor index f985b0e234..84864a2510 100644 --- a/basis/regexp/disambiguate/disambiguate.factor +++ b/basis/regexp/disambiguate/disambiguate.factor @@ -1,8 +1,8 @@ ! Copyright (C) 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel accessors regexp.classes math.bits assocs sequences -arrays sets regexp.dfa math fry regexp.minimize regexp.ast -locals regexp.transition-tables ; +USING: accessors arrays assocs fry kernel locals math math.bits +regexp.ast regexp.classes regexp.transition-tables sequences +sets ; IN: regexp.disambiguate TUPLE: parts in out ; diff --git a/basis/regexp/minimize/minimize.factor b/basis/regexp/minimize/minimize.factor index b2d636a498..65830ac720 100644 --- a/basis/regexp/minimize/minimize.factor +++ b/basis/regexp/minimize/minimize.factor @@ -1,8 +1,8 @@ ! Copyright (C) 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. -USING: kernel sequences regexp.transition-tables fry assocs -accessors locals math sorting arrays sets hashtables regexp.dfa -combinators.short-circuit regexp.classes ; +USING: accessors arrays assocs combinators.short-circuit fry +hashtables kernel locals math regexp.classes +regexp.transition-tables sequences sets sorting ; IN: regexp.minimize : table>state-numbers ( table -- assoc ) diff --git a/basis/regexp/negation/negation.factor b/basis/regexp/negation/negation.factor index 81ac83da0b..85b7741a52 100644 --- a/basis/regexp/negation/negation.factor +++ b/basis/regexp/negation/negation.factor @@ -1,9 +1,9 @@ ! Copyright (C) 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: regexp.nfa regexp.disambiguate kernel sequences -assocs regexp.classes hashtables accessors fry vectors -regexp.ast regexp.transition-tables regexp.minimize -regexp.dfa namespaces sets ; +USING: accessors assocs fry hashtables kernel namespaces +regexp.ast regexp.classes regexp.dfa regexp.disambiguate +regexp.minimize regexp.nfa regexp.transition-tables sequences +sets vectors ; IN: regexp.negation CONSTANT: fail-state -1 diff --git a/basis/regexp/nfa/nfa.factor b/basis/regexp/nfa/nfa.factor index 864a1fedba..5cac3c229d 100644 --- a/basis/regexp/nfa/nfa.factor +++ b/basis/regexp/nfa/nfa.factor @@ -1,13 +1,11 @@ ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors arrays assocs grouping kernel locals math namespaces -sequences fry quotations math.order math.ranges vectors -unicode.categories regexp.transition-tables words sets hashtables -combinators.short-circuit unicode.data regexp.ast -regexp.classes memoize ; +USING: accessors arrays assocs combinators.short-circuit fry +kernel locals math memoize namespaces regexp.ast regexp.classes +regexp.transition-tables sequences sets unicode vectors ; IN: regexp.nfa -! This uses unicode.data for ch>upper and ch>lower +! This uses unicode for ch>upper and ch>lower ! but case-insensitive matching should be done by case-folding everything ! before processing starts diff --git a/basis/regexp/parser/parser.factor b/basis/regexp/parser/parser.factor index 01cff98901..e919ff6d2d 100644 --- a/basis/regexp/parser/parser.factor +++ b/basis/regexp/parser/parser.factor @@ -1,10 +1,9 @@ ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: peg.ebnf kernel math.parser sequences assocs arrays fry math -combinators regexp.classes strings splitting peg locals accessors -regexp.ast unicode.case unicode.script.private unicode.categories -memoize interval-maps sets unicode.data combinators.short-circuit -namespaces ; +USING: accessors arrays assocs combinators +combinators.short-circuit interval-maps kernel locals +math.parser memoize peg.ebnf regexp.ast regexp.classes sequences +sets splitting strings unicode unicode.data unicode.script ; IN: regexp.parser : allowed-char? ( ch -- ? ) diff --git a/basis/regexp/transition-tables/transition-tables.factor b/basis/regexp/transition-tables/transition-tables.factor index b548b883b2..4352ba9979 100644 --- a/basis/regexp/transition-tables/transition-tables.factor +++ b/basis/regexp/transition-tables/transition-tables.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors arrays assocs fry hashtables kernel sequences -vectors locals regexp.classes sets ; +USING: accessors arrays assocs fry hashtables kernel locals +regexp.classes sequences sets vectors ; IN: regexp.transition-tables TUPLE: transition-table transitions start-state final-states ; diff --git a/basis/roman/roman.factor b/basis/roman/roman.factor index 346226bf4e..fe22796346 100644 --- a/basis/roman/roman.factor +++ b/basis/roman/roman.factor @@ -4,7 +4,7 @@ USING: accessors arrays assocs combinators.smart effects effects.parser fry generalizations grouping kernel lexer macros math math.order math.vectors namespaces parser quotations sequences sequences.private splitting.monotonic stack-checker -strings unicode.case words ; +strings unicode words ; IN: roman load ; IN: tools.test diff --git a/basis/ui/backend/cocoa/views/views.factor b/basis/ui/backend/cocoa/views/views.factor index 2f8bcbd872..565c5acbaa 100644 --- a/basis/ui/backend/cocoa/views/views.factor +++ b/basis/ui/backend/cocoa/views/views.factor @@ -7,7 +7,7 @@ cocoa.views combinators core-foundation.strings core-graphics core-graphics.types core-text io.encodings.utf8 kernel literals locals math math.rectangles namespaces opengl sequences threads ui.gadgets ui.gadgets.private ui.gadgets.worlds ui.gestures -ui.private unicode.case ; +ui.private unicode ; IN: ui.backend.cocoa.views : send-mouse-moved ( view event -- ) diff --git a/basis/ui/commands/commands.factor b/basis/ui/commands/commands.factor index 713a3b1204..3497f677c0 100644 --- a/basis/ui/commands/commands.factor +++ b/basis/ui/commands/commands.factor @@ -1,8 +1,7 @@ ! Copyright (C) 2006, 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: accessors assocs fry help.markup kernel make quotations -sequences splitting tr ui.gestures unicode.case unicode.categories -words ; +sequences splitting tr ui.gestures unicode words ; IN: ui.commands SYMBOL: +nullary+ diff --git a/basis/ui/gadgets/editors/editors.factor b/basis/ui/gadgets/editors/editors.factor index 0a2caefd32..f040136c8b 100644 --- a/basis/ui/gadgets/editors/editors.factor +++ b/basis/ui/gadgets/editors/editors.factor @@ -9,7 +9,7 @@ sorting splitting timers ui.baseline-alignment ui.clipboards ui.commands ui.gadgets ui.gadgets.borders ui.gadgets.line-support ui.gadgets.menus ui.gadgets.scrollers ui.gadgets.theme ui.gestures ui.pens.solid ui.render ui.text -unicode.categories ; +unicode ; EXCLUDE: fonts => selection ; IN: ui.gadgets.editors diff --git a/basis/unicode/breaks/breaks-docs.factor b/basis/unicode/breaks/breaks-docs.factor index eb8c2eb00c..b9d5dbb709 100644 --- a/basis/unicode/breaks/breaks-docs.factor +++ b/basis/unicode/breaks/breaks-docs.factor @@ -1,4 +1,4 @@ -USING: help.syntax help.markup strings ; +USING: help.syntax help.markup strings unicode ; IN: unicode.breaks ABOUT: "unicode.breaks" diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor index 2bb0f98ba4..1aa5ec3000 100644 --- a/basis/unicode/breaks/breaks.factor +++ b/basis/unicode/breaks/breaks.factor @@ -1,11 +1,8 @@ ! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien.syntax arrays assocs combinators -combinators.short-circuit compiler.units fry interval-maps io -io.encodings.ascii io.files kernel literals locals make math -math.parser math.ranges memoize namespaces parser sequences -sets simple-flat-file splitting unicode.categories -unicode.categories.syntax unicode.data unicode.normalize +USING: accessors arrays assocs combinators fry interval-maps +kernel literals locals math namespaces parser sequences +simple-flat-file unicode.categories unicode.data unicode.normalize.private words words.constant ; IN: unicode.breaks @@ -105,38 +102,6 @@ define-constant : grapheme-break? ( class1 class2 -- ? ) grapheme-table nth nth not ; -PRIVATE> - -: first-grapheme ( str -- i ) - unclip-slice grapheme-class over - [ grapheme-class [ nip ] [ grapheme-break? ] 2bi ] find drop - nip swap length or 1 + ; - -: first-grapheme-from ( start str -- i ) - over tail-slice first-grapheme + ; - -: last-grapheme ( str -- i ) - unclip-last-slice grapheme-class swap - [ grapheme-class dup rot grapheme-break? ] find-last drop ?1+ nip ; - -: last-grapheme-from ( end str -- i ) - swap head-slice last-grapheme ; - -pieces ( str quot: ( str -- i ) -- graphemes ) - [ dup empty? not ] swap '[ dup @ cut-slice swap ] produce nip ; inline - -PRIVATE> - -: >graphemes ( str -- graphemes ) - [ first-grapheme ] >pieces ; - -: string-reverse ( str -- rts ) - >graphemes reverse! concat ; - - - - : first-word ( str -- i ) - [ [ length ] [ first word-break-prop ] bi ] keep - 1 swap dup '[ _ word-break-next ] find-index-from - drop nip swap or ; - -: >words ( str -- words ) - [ first-word ] >pieces ; - - - -: word-break-at? ( i str -- ? ) - { - [ drop zero? ] - [ length = ] - [ - [ nth-next [ word-break-prop ] dip ] 2keep - word-break-next nip - ] - } 2|| ; - -: first-word-from ( start str -- i ) - over tail-slice first-word + ; - -: last-word ( str -- i ) - [ length iota ] keep '[ _ word-break-at? ] find-last drop 0 or ; - -: last-word-from ( end str -- i ) - swap head-slice last-word ; diff --git a/basis/unicode/case/case-docs.factor b/basis/unicode/case/case-docs.factor index 845eadc84a..e1ea0757eb 100644 --- a/basis/unicode/case/case-docs.factor +++ b/basis/unicode/case/case-docs.factor @@ -1,4 +1,4 @@ -USING: help.syntax help.markup kernel strings ; +USING: help.syntax help.markup kernel strings unicode ; IN: unicode.case ABOUT: "unicode.case" diff --git a/basis/unicode/case/case.factor b/basis/unicode/case/case.factor index c8ed8b2d0f..011913097d 100644 --- a/basis/unicode/case/case.factor +++ b/basis/unicode/case/case.factor @@ -1,9 +1,7 @@ ! Copyright (C) 2008, 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: unicode.data sequences namespaces -sbufs make unicode.normalize math hints -unicode.categories combinators assocs combinators.short-circuit -strings splitting kernel accessors unicode.breaks fry locals ; +USING: combinators.short-circuit kernel locals namespaces sbufs +sequences splitting unicode.categories unicode.data ; QUALIFIED: ascii IN: unicode.case @@ -52,10 +50,13 @@ SYMBOL: locale ! Just casing locale, or overall? [ 1 head* CHAR: greek-small-letter-final-sigma suffix ] when ] if-empty ; inline +! this duplicate unicode to prevent dependencies +CATEGORY-NOT: (uncased) Lu Ll Lt Lm Mn Me ; + : sigma-map ( string -- string ) { CHAR: greek-capital-letter-sigma } split [ [ [ { CHAR: greek-small-letter-sigma } ] [ - dup first uncased? + dup first (uncased)? CHAR: greek-small-letter-final-sigma CHAR: greek-small-letter-sigma ? prefix ] if-empty @@ -85,43 +86,3 @@ SYMBOL: locale ! Just casing locale, or overall? [ lithuanian? [ lithuanian>upper ] when ] bi ; PRIVATE> - -: >lower ( string -- lower ) - locale>lower final-sigma - [ lower>> ] [ ch>lower ] map-case ; - -HINTS: >lower string ; - -: >upper ( string -- upper ) - locale>upper - [ upper>> ] [ ch>upper ] map-case ; - -HINTS: >upper string ; - -title) ( string -- title ) - locale>upper - [ title>> ] [ ch>title ] map-case ; inline - -PRIVATE> - -: capitalize ( string -- title ) - unclip-slice 1string [ >lower ] [ (>title) ] bi* - "" prepend-as ; inline - -: >title ( string -- title ) - final-sigma >words [ capitalize ] map! concat ; - -HINTS: >title string ; - -: >case-fold ( string -- fold ) - >upper >lower ; - -: lower? ( string -- ? ) dup >lower = ; - -: upper? ( string -- ? ) dup >upper = ; - -: title? ( string -- ? ) dup >title = ; - -: case-fold? ( string -- ? ) dup >case-fold = ; diff --git a/basis/unicode/categories/categories-docs.factor b/basis/unicode/categories/categories-docs.factor index 47e3f6f541..9aae8f069e 100644 --- a/basis/unicode/categories/categories-docs.factor +++ b/basis/unicode/categories/categories-docs.factor @@ -1,66 +1,21 @@ -! Copyright (C) 2009 Daniel Ehrenberg +! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: help.markup help.syntax kernel ; +USING: help.syntax help.markup ; IN: unicode.categories -HELP: LETTER -{ $class-description "The class of upper cased letters." } ; - -HELP: Letter -{ $class-description "The class of letters." } ; - -HELP: alpha -{ $class-description "The class of alphanumeric characters." } ; - -HELP: math -{ $class-description "The class of Unicode math characters." } ; - -HELP: blank -{ $class-description "The class of whitespace characters." } ; - -HELP: character -{ $class-description "The class of pre-defined Unicode code points." } ; - -HELP: control -{ $class-description "The class of control characters." } ; - -HELP: digit -{ $class-description "The class of digits." } ; - -HELP: letter -{ $class-description "The class of lower-cased letters." } ; - -HELP: printable -{ $class-description "The class of characters which are printable, as opposed to being control or formatting characters." } ; - -HELP: uncased -{ $class-description "The class of letters which don't have a case." } ; +ABOUT: "unicode.categories" -ARTICLE: "unicode.categories" "Character classes" -"The " { $vocab-link "unicode.categories" } " vocabulary implements predicates for determining if a code point has a particular property, for example being a lower cased letter. These should be used in preference to the " { $vocab-link "ascii" } " equivalents in most cases. Each character class has an associated predicate word." +ARTICLE: "unicode.categories" "Unicode category syntax" +"There is special syntax sugar for making predicate classes which are unions of Unicode general categories, plus some other code." { $subsections - blank - blank? - letter - letter? - LETTER - LETTER? - Letter - Letter? - digit - digit? - printable - printable? - alpha - alpha? - control - control? - uncased - uncased? - character - character? - math - math? + POSTPONE: CATEGORY: + POSTPONE: CATEGORY-NOT: } ; -ABOUT: "unicode.categories" +HELP: CATEGORY: +{ $syntax "CATEGORY: foo Nl Pd Lu | \"Diacritic\" property? ;" } +{ $description "This defines a predicate class which is a subset of code points. In this example, " { $snippet "foo" } " is the class of characters which are in the general category Nl or Pd or Lu, or which have the Diacritic property." } ; + +HELP: CATEGORY-NOT: +{ $syntax "CATEGORY-NOT: foo Nl Pd Lu | \"Diacritic\" property? ;" } +{ $description "This defines a predicate class which is a subset of code points, the complement of what " { $link POSTPONE: CATEGORY: } " would define. In this example, " { $snippet "foo" } " is the class of characters which are neither in the general category Nl or Pd or Lu, nor have the Diacritic property." } ; diff --git a/basis/unicode/categories/categories-tests.factor b/basis/unicode/categories/categories-tests.factor index 52ade75944..536c0e947c 100644 --- a/basis/unicode/categories/categories-tests.factor +++ b/basis/unicode/categories/categories-tests.factor @@ -1,19 +1,2 @@ -! Copyright (C) 2008 Daniel Ehrenberg. +! Copyright (C) 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: tools.test kernel unicode.categories words sequences unicode.data ; -IN: unicode.categories.tests - -{ { f f t t f t t f f t } } [ CHAR: A { - blank? letter? LETTER? Letter? digit? - printable? alpha? control? uncased? character? -} [ execute ] with map ] unit-test -{ "Nd" } [ CHAR: 3 category ] unit-test -{ "Lo" } [ 0x3400 category ] unit-test -{ "Lo" } [ 0x3450 category ] unit-test -{ "Lo" } [ 0x4DB5 category ] unit-test -{ "Cs" } [ 0xDD00 category ] unit-test -{ t } [ CHAR: \t blank? ] unit-test -{ t } [ CHAR: \s blank? ] unit-test -{ t } [ CHAR: \r blank? ] unit-test -{ t } [ CHAR: \n blank? ] unit-test -{ f } [ CHAR: a blank? ] unit-test diff --git a/basis/unicode/categories/categories.factor b/basis/unicode/categories/categories.factor index 1e6a7b3a99..291e2d6499 100644 --- a/basis/unicode/categories/categories.factor +++ b/basis/unicode/categories/categories.factor @@ -1,16 +1,34 @@ ! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: unicode.categories.syntax sequences unicode.data ; +USING: accessors assocs classes.parser classes.predicate fry +kernel math parser sequences splitting unicode.data +unicode.data.private ; IN: unicode.categories -CATEGORY: blank Zs Zl Zp | "\r\n\t" member? ; -CATEGORY: letter Ll | "Other_Lowercase" property? ; -CATEGORY: LETTER Lu | "Other_Uppercase" property? ; -CATEGORY: Letter Lu Ll Lt Lm Lo Nl ; -CATEGORY: digit Nd Nl No ; -CATEGORY-NOT: printable Cc Cf Cs Co Cn ; -CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No | "Other_Alphabetic" property? ; -CATEGORY: control Cc ; -CATEGORY-NOT: uncased Lu Ll Lt Lm Mn Me ; -CATEGORY-NOT: character Cn ; -CATEGORY: math Sm | "Other_Math" property? ; +! For use in CATEGORY: +SYMBOLS: Cn Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co | ; + +fixnum-strict dup category# _ member? [ drop t ] _ if ] ; + +: integer-predicate-class ( word predicate -- ) + integer swap define-predicate-class ; + +: define-category ( word categories code -- ) + [category] integer-predicate-class ; + +: define-not-category ( word categories code -- ) + [category] [ not ] compose integer-predicate-class ; + +: parse-category ( -- word tokens quot ) + scan-new-class \ ; parse-until { | } split1 + [ [ name>> categories-map at ] B{ } map-as ] + [ [ [ ] like ] [ [ drop f ] ] if* ] bi* ; + +PRIVATE> + +SYNTAX: CATEGORY: parse-category define-category ; + +SYNTAX: CATEGORY-NOT: parse-category define-not-category ; diff --git a/basis/unicode/categories/summary.txt b/basis/unicode/categories/summary.txt index 7efad41e84..651d51c34c 100644 --- a/basis/unicode/categories/summary.txt +++ b/basis/unicode/categories/summary.txt @@ -1 +1 @@ -Unicode character categories +Parsing words used by Unicode implementation diff --git a/basis/unicode/categories/syntax/authors.txt b/basis/unicode/categories/syntax/authors.txt deleted file mode 100644 index f990dd0ed2..0000000000 --- a/basis/unicode/categories/syntax/authors.txt +++ /dev/null @@ -1 +0,0 @@ -Daniel Ehrenberg diff --git a/basis/unicode/categories/syntax/summary.txt b/basis/unicode/categories/syntax/summary.txt deleted file mode 100644 index 651d51c34c..0000000000 --- a/basis/unicode/categories/syntax/summary.txt +++ /dev/null @@ -1 +0,0 @@ -Parsing words used by Unicode implementation diff --git a/basis/unicode/categories/syntax/syntax-docs.factor b/basis/unicode/categories/syntax/syntax-docs.factor deleted file mode 100644 index 89c8d3554c..0000000000 --- a/basis/unicode/categories/syntax/syntax-docs.factor +++ /dev/null @@ -1,21 +0,0 @@ -! Copyright (C) 2008 Daniel Ehrenberg. -! See http://factorcode.org/license.txt for BSD license. -USING: help.syntax help.markup ; -IN: unicode.categories.syntax - -ABOUT: "unicode.categories.syntax" - -ARTICLE: "unicode.categories.syntax" "Unicode category syntax" -"There is special syntax sugar for making predicate classes which are unions of Unicode general categories, plus some other code." -{ $subsections - POSTPONE: CATEGORY: - POSTPONE: CATEGORY-NOT: -} ; - -HELP: CATEGORY: -{ $syntax "CATEGORY: foo Nl Pd Lu | \"Diacritic\" property? ;" } -{ $description "This defines a predicate class which is a subset of code points. In this example, " { $snippet "foo" } " is the class of characters which are in the general category Nl or Pd or Lu, or which have the Diacritic property." } ; - -HELP: CATEGORY-NOT: -{ $syntax "CATEGORY-NOT: foo Nl Pd Lu | \"Diacritic\" property? ;" } -{ $description "This defines a predicate class which is a subset of code points, the complement of what " { $link POSTPONE: CATEGORY: } " would define. In this example, " { $snippet "foo" } " is the class of characters which are neither in the general category Nl or Pd or Lu, nor have the Diacritic property." } ; diff --git a/basis/unicode/categories/syntax/syntax-tests.factor b/basis/unicode/categories/syntax/syntax-tests.factor deleted file mode 100644 index 536c0e947c..0000000000 --- a/basis/unicode/categories/syntax/syntax-tests.factor +++ /dev/null @@ -1,2 +0,0 @@ -! Copyright (C) 2009 Daniel Ehrenberg. -! See http://factorcode.org/license.txt for BSD license. diff --git a/basis/unicode/categories/syntax/syntax.factor b/basis/unicode/categories/syntax/syntax.factor deleted file mode 100644 index acb3fbd9bb..0000000000 --- a/basis/unicode/categories/syntax/syntax.factor +++ /dev/null @@ -1,34 +0,0 @@ -! Copyright (C) 2008, 2009 Daniel Ehrenberg. -! See http://factorcode.org/license.txt for BSD license. -USING: unicode.data kernel math sequences parser unicode.data.private -bit-arrays namespaces sequences.private arrays classes.parser -assocs classes.predicate sets fry splitting accessors ; -IN: unicode.categories.syntax - -! For use in CATEGORY: -SYMBOLS: Cn Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co | ; - -fixnum-strict dup category# _ member? [ drop t ] _ if ] ; - -: integer-predicate-class ( word predicate -- ) - integer swap define-predicate-class ; - -: define-category ( word categories code -- ) - [category] integer-predicate-class ; - -: define-not-category ( word categories code -- ) - [category] [ not ] compose integer-predicate-class ; - -: parse-category ( -- word tokens quot ) - scan-new-class \ ; parse-until { | } split1 - [ [ name>> categories-map at ] B{ } map-as ] - [ [ [ ] like ] [ [ drop f ] ] if* ] bi* ; - -PRIVATE> - -SYNTAX: CATEGORY: parse-category define-category ; - -SYNTAX: CATEGORY-NOT: parse-category define-not-category ; diff --git a/basis/unicode/categories/syntax/tags.txt b/basis/unicode/categories/syntax/tags.txt deleted file mode 100644 index 8e27be7d61..0000000000 --- a/basis/unicode/categories/syntax/tags.txt +++ /dev/null @@ -1 +0,0 @@ -text diff --git a/basis/unicode/collation/collation-docs.factor b/basis/unicode/collation/collation-docs.factor index 33b6f2f72f..79379d57a9 100644 --- a/basis/unicode/collation/collation-docs.factor +++ b/basis/unicode/collation/collation-docs.factor @@ -1,5 +1,5 @@ USING: byte-arrays help.syntax help.markup kernel math.order -strings ; +strings unicode ; IN: unicode.collation ARTICLE: "unicode.collation" "Collation and weak comparison" diff --git a/basis/unicode/collation/collation.factor b/basis/unicode/collation/collation.factor index 02d813afce..be82f697cd 100644 --- a/basis/unicode/collation/collation.factor +++ b/basis/unicode/collation/collation.factor @@ -1,13 +1,13 @@ ! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: sequences io.files io.encodings.ascii kernel splitting -accessors math.parser ascii io assocs strings math namespaces make -sorting combinators math.order arrays unicode.normalize unicode.data -locals macros sequences.deep words unicode.breaks quotations -combinators.short-circuit simple-flat-file ; +USING: accessors arrays assocs combinators +combinators.short-circuit kernel locals make math math.order +math.parser namespaces sequences simple-flat-file splitting +strings unicode.data ; IN: unicode.collation : completely-ignorable? ( weight -- ? ) @@ -124,36 +125,3 @@ PRIVATE> [ swap ignorable?>> or ] [ swap completely-ignorable? or not ] 2bi ] filter nip ; - -: collation-key ( string -- key ) - nfd string>graphemes graphemes>weights - filter-ignorable weights>bytes ; - - - -: primary= ( str1 str2 -- ? ) - 3 insensitive= ; - -: secondary= ( str1 str2 -- ? ) - 2 insensitive= ; - -: tertiary= ( str1 str2 -- ? ) - 1 insensitive= ; - -: quaternary= ( str1 str2 -- ? ) - 0 insensitive= ; - -: w/collation-key ( str -- {str,key} ) - [ collation-key ] keep 2array ; - -: sort-strings ( strings -- sorted ) - [ w/collation-key ] map natural-sort values ; - -: string<=> ( str1 str2 -- <=> ) - [ w/collation-key ] compare ; diff --git a/basis/unicode/data/data-docs.factor b/basis/unicode/data/data-docs.factor index edfa81d0a3..eb7e519f73 100644 --- a/basis/unicode/data/data-docs.factor +++ b/basis/unicode/data/data-docs.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. -USING: help.syntax help.markup kernel math strings ; +USING: help.syntax help.markup kernel math strings unicode ; IN: unicode.data ABOUT: "unicode.data" diff --git a/basis/unicode/data/data.factor b/basis/unicode/data/data.factor index ebc3f6d9ef..0a759c625e 100644 --- a/basis/unicode/data/data.factor +++ b/basis/unicode/data/data.factor @@ -28,17 +28,12 @@ PRIVATE> CONSTANT: name-map H{ } : canonical-entry ( char -- seq ) canonical-map at ; inline -: combine-chars ( a b -- char/f ) >2ch combine-map at ; inline : compatibility-entry ( char -- seq ) compatibility-map at ; inline +: combine-chars ( a b -- char/f ) >2ch combine-map at ; inline : combining-class ( char -- n ) class-map at ; inline : non-starter? ( char -- ? ) combining-class { 0 f } member? not ; inline -: name>char ( name -- char ) name-map at ; inline -: char>name ( char -- name ) name-map value-at ; inline : property ( property -- interval-map ) properties at ; foldable : property? ( char property -- ? ) property interval-sets:in? ; inline -: ch>lower ( ch -- lower ) simple-lower ?at drop ; inline -: ch>upper ( ch -- upper ) simple-upper ?at drop ; inline -: ch>title ( ch -- title ) simple-title ?at drop ; inline : special-case ( ch -- casing-tuple ) special-casing at ; inline ! For non-existent characters, use Cn @@ -143,7 +138,7 @@ PRIVATE> name-map sort-values keys [ { [ "first>" tail? ] [ "last>" tail? ] } 1|| ] filter 2 group [ - [ name>char ] bi@ [ [a,b] ] [ table ?nth ] bi + [ name-map at ] bi@ [ [a,b] ] [ table ?nth ] bi [ swap table ?set-nth ] curry each ] assoc-each table ; @@ -208,14 +203,15 @@ load-data { } cleave combine-map keys [ 2ch> nip ] map -[ combining-class ] reject +[ class-map at ] reject [ 0 swap class-map set-at ] each load-special-casing special-casing swap assoc-union! drop load-properties properties swap assoc-union! drop -[ name>char [ "Invalid character" throw ] unless* ] -name>char-hook set-global - PRIVATE> + +[ + name-map at [ "Invalid character" throw ] unless* +] name>char-hook set-global diff --git a/basis/unicode/normalize/normalize-docs.factor b/basis/unicode/normalize/normalize-docs.factor index 58f381446e..fb71b614b2 100644 --- a/basis/unicode/normalize/normalize-docs.factor +++ b/basis/unicode/normalize/normalize-docs.factor @@ -1,4 +1,4 @@ -USING: help.syntax help.markup strings ; +USING: help.syntax help.markup strings unicode ; IN: unicode.normalize ABOUT: "unicode.normalize" diff --git a/basis/unicode/normalize/normalize.factor b/basis/unicode/normalize/normalize.factor index ee9e1dcad3..5c9065dc91 100644 --- a/basis/unicode/normalize/normalize.factor +++ b/basis/unicode/normalize/normalize.factor @@ -89,23 +89,6 @@ HINTS: (nfd) string ; HINTS: (nfkd) string ; -PRIVATE> - -: nfd ( string -- nfd ) - [ (nfd) ] with-string ; - -: nfkd ( string -- nfkd ) - [ (nfkd) ] with-string ; - -: string-append ( s1 s2 -- string ) - [ append ] keep - 0 over ?nth non-starter? - [ length dupd reorder-back ] [ drop ] if ; - -HINTS: string-append string string ; - - - -: nfc ( string -- nfc ) - [ (nfd) combine ] with-string ; - -: nfkc ( string -- nfkc ) - [ (nfkd) combine ] with-string ; diff --git a/basis/unicode/script/script-docs.factor b/basis/unicode/script/script-docs.factor index ed255cc803..5bdffbb233 100644 --- a/basis/unicode/script/script-docs.factor +++ b/basis/unicode/script/script-docs.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. -USING: help.syntax help.markup strings ; +USING: help.syntax help.markup strings unicode ; IN: unicode.script ABOUT: "unicode.script" diff --git a/basis/unicode/script/script.factor b/basis/unicode/script/script.factor index 278199e63b..7d9a0f0bd7 100644 --- a/basis/unicode/script/script.factor +++ b/basis/unicode/script/script.factor @@ -4,15 +4,8 @@ USING: interval-maps namespaces parser simple-flat-file words.constant ; IN: unicode.script -> - -PRIVATE> - -: script-of ( char -- script ) - script-table interval-at ; diff --git a/basis/unicode/unicode-docs.factor b/basis/unicode/unicode-docs.factor index 24cf64f9e0..9d49fc9dbb 100644 --- a/basis/unicode/unicode-docs.factor +++ b/basis/unicode/unicode-docs.factor @@ -15,7 +15,7 @@ $nl { $vocab-subsection "Word and grapheme breaks" "unicode.breaks" } { $vocab-subsection "Unicode normalization" "unicode.normalize" } "The following are mostly for internal use:" -{ $vocab-subsection "Unicode category syntax" "unicode.categories.syntax" } +{ $vocab-subsection "Unicode category syntax" "unicode.categories" } { $vocab-subsection "Unicode data tables" "unicode.data" } { $see-also "ascii" "io.encodings" } ; diff --git a/basis/unicode/unicode.factor b/basis/unicode/unicode.factor index 32adb961d4..97d1f20e26 100644 --- a/basis/unicode/unicode.factor +++ b/basis/unicode/unicode.factor @@ -1 +1,200 @@ + +USING: accessors arrays assocs combinators.short-circuit fry +hints interval-maps kernel math math.order sequences sorting +strings unicode.breaks.private unicode.case.private +unicode.categories unicode.collation unicode.collation.private +unicode.data unicode.data.private unicode.normalize.private +unicode.script ; + IN: unicode + +CATEGORY: blank Zs Zl Zp | "\r\n\t" member? ; + +CATEGORY: letter Ll | "Other_Lowercase" property? ; + +CATEGORY: LETTER Lu | "Other_Uppercase" property? ; + +CATEGORY: Letter Lu Ll Lt Lm Lo Nl ; + +CATEGORY: digit Nd Nl No ; + +CATEGORY-NOT: printable Cc Cf Cs Co Cn ; + +CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No | "Other_Alphabetic" property? ; + +CATEGORY: control Cc ; + +CATEGORY-NOT: uncased Lu Ll Lt Lm Mn Me ; + +CATEGORY-NOT: character Cn ; + +CATEGORY: math Sm | "Other_Math" property? ; + +: script-of ( char -- script ) + script-table interval-at ; + +: name>char ( name -- char ) name-map at ; inline + +: char>name ( char -- name ) name-map value-at ; inline + +: ch>lower ( ch -- lower ) simple-lower ?at drop ; inline + +: ch>upper ( ch -- upper ) simple-upper ?at drop ; inline + +: ch>title ( ch -- title ) simple-title ?at drop ; inline + +: first-grapheme ( str -- i ) + unclip-slice grapheme-class over + [ grapheme-class [ nip ] [ grapheme-break? ] 2bi ] find drop + nip swap length or 1 + ; + +: first-grapheme-from ( start str -- i ) + over tail-slice first-grapheme + ; + +: last-grapheme ( str -- i ) + unclip-last-slice grapheme-class swap + [ grapheme-class dup rot grapheme-break? ] find-last drop ?1+ nip ; + +: last-grapheme-from ( end str -- i ) + swap head-slice last-grapheme ; + +pieces ( str quot: ( str -- i ) -- graphemes ) + [ dup empty? not ] swap '[ dup @ cut-slice swap ] produce nip ; inline + +PRIVATE> + +: >graphemes ( str -- graphemes ) + [ first-grapheme ] >pieces ; + +: string-reverse ( str -- rts ) + >graphemes reverse! concat ; + +: first-word ( str -- i ) + [ [ length ] [ first word-break-prop ] bi ] keep + 1 swap dup '[ _ word-break-next ] find-index-from + drop nip swap or ; + +: >words ( str -- words ) + [ first-word ] >pieces ; + + + +: word-break-at? ( i str -- ? ) + { + [ drop zero? ] + [ length = ] + [ + [ nth-next [ word-break-prop ] dip ] 2keep + word-break-next nip + ] + } 2|| ; + +: first-word-from ( start str -- i ) + over tail-slice first-word + ; + +: last-word ( str -- i ) + [ length iota ] keep '[ _ word-break-at? ] find-last drop 0 or ; + +: last-word-from ( end str -- i ) + swap head-slice last-word ; + +: >lower ( string -- lower ) + locale>lower final-sigma + [ lower>> ] [ ch>lower ] map-case ; + +HINTS: >lower string ; + +: >upper ( string -- upper ) + locale>upper + [ upper>> ] [ ch>upper ] map-case ; + +HINTS: >upper string ; + +title) ( string -- title ) + locale>upper + [ title>> ] [ ch>title ] map-case ; inline + +PRIVATE> + +: capitalize ( string -- title ) + unclip-slice 1string [ >lower ] [ (>title) ] bi* + "" prepend-as ; inline + +: >title ( string -- title ) + final-sigma >words [ capitalize ] map! concat ; + +HINTS: >title string ; + +: >case-fold ( string -- fold ) + >upper >lower ; + +: lower? ( string -- ? ) dup >lower = ; + +: upper? ( string -- ? ) dup >upper = ; + +: title? ( string -- ? ) dup >title = ; + +: case-fold? ( string -- ? ) dup >case-fold = ; + +: nfd ( string -- nfd ) + [ (nfd) ] with-string ; + +: nfkd ( string -- nfkd ) + [ (nfkd) ] with-string ; + +: string-append ( s1 s2 -- string ) + [ append ] keep + 0 over ?nth non-starter? + [ length dupd reorder-back ] [ drop ] if ; + +HINTS: string-append string string ; + +: nfc ( string -- nfc ) + [ (nfd) combine ] with-string ; + +: nfkc ( string -- nfkc ) + [ (nfkd) combine ] with-string ; + +: collation-key ( string -- key ) + nfd string>graphemes graphemes>weights + filter-ignorable weights>bytes ; + + + +: primary= ( str1 str2 -- ? ) + 3 insensitive= ; + +: secondary= ( str1 str2 -- ? ) + 2 insensitive= ; + +: tertiary= ( str1 str2 -- ? ) + 1 insensitive= ; + +: quaternary= ( str1 str2 -- ? ) + 0 insensitive= ; + +: w/collation-key ( str -- {str,key} ) + [ collation-key ] keep 2array ; + +: sort-strings ( strings -- sorted ) + [ w/collation-key ] map natural-sort values ; + +: string<=> ( str1 str2 -- <=> ) + [ w/collation-key ] compare ; diff --git a/basis/uuid/uuid.factor b/basis/uuid/uuid.factor index 35338bfd22..21b60feffc 100644 --- a/basis/uuid/uuid.factor +++ b/basis/uuid/uuid.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license USING: byte-arrays calendar checksums checksums.md5 checksums.sha io.binary kernel math math.parser math.ranges -random sequences strings system unicode.case ; +random sequences strings system unicode ; IN: uuid gml ; IN: gml.ui diff --git a/extra/graphviz/render/render.factor b/extra/graphviz/render/render.factor index 09157cd613..e294f37645 100644 --- a/extra/graphviz/render/render.factor +++ b/extra/graphviz/render/render.factor @@ -5,7 +5,7 @@ graphviz.dot images.viewer io.backend io.directories io.encodings.8-bit.latin1 io.encodings.utf8 io.files io.files.temp io.files.unique io.launcher io.standard-paths kernel locals make namespaces sequences summary system threads -unicode.case vocabs webbrowser words ; +unicode vocabs webbrowser words ; IN: graphviz.render uchar ; IN: id3 diff --git a/extra/managed-server/chat/chat.factor b/extra/managed-server/chat/chat.factor index 258886c1f5..730a819e98 100644 --- a/extra/managed-server/chat/chat.factor +++ b/extra/managed-server/chat/chat.factor @@ -3,7 +3,7 @@ USING: accessors assocs calendar calendar.format combinators.smart io io.crlf io.encodings.utf8 kernel locals managed-server namespaces sequences sorting splitting -unicode.case ; +unicode ; IN: managed-server.chat TUPLE: chat-server < managed-server ; diff --git a/extra/parser-combinators/parser-combinators-tests.factor b/extra/parser-combinators/parser-combinators-tests.factor index cf6a731f53..fac52bab71 100644 --- a/extra/parser-combinators/parser-combinators-tests.factor +++ b/extra/parser-combinators/parser-combinators-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2005 Chris Double. ! See http://factorcode.org/license.txt for BSD license. USING: kernel lists lists.lazy tools.test strings math -sequences parser-combinators arrays math.parser unicode.categories ; +sequences parser-combinators arrays math.parser unicode ; IN: parser-combinators.tests ! Testing <&> diff --git a/extra/parser-combinators/parser-combinators.factor b/extra/parser-combinators/parser-combinators.factor index 04d8efdc79..39db45eb4a 100644 --- a/extra/parser-combinators/parser-combinators.factor +++ b/extra/parser-combinators/parser-combinators.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: lists lists.lazy promises kernel sequences strings math arrays splitting quotations combinators namespaces locals -unicode.case unicode.categories sequences.deep accessors ; +unicode sequences.deep accessors ; IN: parser-combinators ! Parser combinator protocol diff --git a/extra/parser-combinators/simple/simple.factor b/extra/parser-combinators/simple/simple.factor index fa5705679f..e147620b34 100644 --- a/extra/parser-combinators/simple/simple.factor +++ b/extra/parser-combinators/simple/simple.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2006 Chris Double. ! See http://factorcode.org/license.txt for BSD license. USING: kernel strings math sequences lists.lazy words -math.parser promises parser-combinators unicode.categories ; +math.parser promises parser-combinators unicode ; IN: parser-combinators.simple : digit-parser ( -- parser ) diff --git a/extra/pdf/wrap/wrap.factor b/extra/pdf/wrap/wrap.factor index 0b137212a3..c3b03eb823 100644 --- a/extra/pdf/wrap/wrap.factor +++ b/extra/pdf/wrap/wrap.factor @@ -1,8 +1,7 @@ ! Copyright (C) 2011-2012 John Benediktsson ! See http://factorcode.org/license.txt for BSD license -USING: kernel fry make math sequences ui.text unicode.categories -wrap ; +USING: kernel fry make math sequences ui.text unicode wrap ; IN: pdf.wrap diff --git a/extra/project-euler/common/common.factor b/extra/project-euler/common/common.factor index cf82e557d1..4b4548e819 100644 --- a/extra/project-euler/common/common.factor +++ b/extra/project-euler/common/common.factor @@ -5,7 +5,7 @@ USING: accessors arrays byte-arrays fry hints kernel lists make math math.functions math.matrices math.order math.parser math.primes.factors math.primes.lists math.primes.miller-rabin math.ranges math.ratios math.vectors namespaces parser prettyprint quotations sequences sorting - strings unicode.case vocabs vocabs.parser words ; + strings unicode vocabs vocabs.parser words ; IN: project-euler.common ! A collection of words used by more than one Project Euler solution diff --git a/extra/python/syntax/syntax-tests.factor b/extra/python/syntax/syntax-tests.factor index 1c5c8cd246..e3c553a127 100644 --- a/extra/python/syntax/syntax-tests.factor +++ b/extra/python/syntax/syntax-tests.factor @@ -3,7 +3,7 @@ fry io.files.temp kernel math namespaces python python.ffi python.modules.__builtin__ python.modules.argparse python.modules.datetime python.modules.os python.modules.os.path python.modules.sys python.modules.time python.objects python.syntax sets splitting tools.test -unicode.categories ; +unicode ; QUALIFIED-WITH: sequences s IN: python.syntax.tests diff --git a/extra/resolv-conf/resolv-conf.factor b/extra/resolv-conf/resolv-conf.factor index b6eefd2147..f5c098c8a8 100644 --- a/extra/resolv-conf/resolv-conf.factor +++ b/extra/resolv-conf/resolv-conf.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors combinators constructors io.encodings.utf8 io.files kernel math math.parser sequences splitting -unicode.categories ; +unicode ; IN: resolv-conf TUPLE: network ip netmask ; diff --git a/extra/robots/robots.factor b/extra/robots/robots.factor index 6802fe0eda..33ac216f39 100644 --- a/extra/robots/robots.factor +++ b/extra/robots/robots.factor @@ -4,7 +4,7 @@ USING: accessors arrays assocs calendar.format combinators combinators.short-circuit fry globs http.client kernel make math.parser multiline namespaces present regexp regexp.combinators sequences sets splitting splitting.monotonic -unicode.case unicode.categories urls ; +unicode urls ; IN: robots ! visit-time is GMT, request-rate is pages/second diff --git a/extra/rosetta-code/odd-word/odd-word.factor b/extra/rosetta-code/odd-word/odd-word.factor index 403ef9daf2..9ca3c3bd3a 100644 --- a/extra/rosetta-code/odd-word/odd-word.factor +++ b/extra/rosetta-code/odd-word/odd-word.factor @@ -1,6 +1,6 @@ ! Copyright (c) 2012 Anonymous ! See http://factorcode.org/license.txt for BSD license. -USING: continuations kernel io io.streams.string locals unicode.categories ; +USING: continuations kernel io io.streams.string locals unicode ; IN: rosetta-code.odd-word ! http://rosettacode.org/wiki/Odd_word_problem diff --git a/extra/rosetta-code/ordered-words/ordered-words.factor b/extra/rosetta-code/ordered-words/ordered-words.factor index e1067b14d4..2886d5190e 100644 --- a/extra/rosetta-code/ordered-words/ordered-words.factor +++ b/extra/rosetta-code/ordered-words/ordered-words.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: grouping http.client io io.encodings.utf8 io.files io.files.temp kernel math memoize sequences sequences.extras -unicode.case urls ; +unicode urls ; IN: rosetta-code.ordered-words ! http://rosettacode.org/wiki/Ordered_words diff --git a/extra/sequences/inserters/inserters-tests.factor b/extra/sequences/inserters/inserters-tests.factor index 3bc38ed7a0..be0378409e 100644 --- a/extra/sequences/inserters/inserters-tests.factor +++ b/extra/sequences/inserters/inserters-tests.factor @@ -1,6 +1,6 @@ ! (c)2010 Joe Groff bsd license USING: assocs kernel sequences sequences.inserters tools.test -unicode.case ; +unicode ; IN: sequences.inserters.tests { V{ 1 2 "Three" "Four" "Five" } } [ diff --git a/extra/talks/google-tech-talk/google-tech-talk.factor b/extra/talks/google-tech-talk/google-tech-talk.factor index 8e5771c5f2..89d19e94fa 100644 --- a/extra/talks/google-tech-talk/google-tech-talk.factor +++ b/extra/talks/google-tech-talk/google-tech-talk.factor @@ -45,7 +45,7 @@ CONSTANT: google-slides { $code "10 dup 0 < [ 1 - ] [ 1 + ] if ." } { $code "10 [ \"Hello Googlers!\" print ] times" } { $code - "USING: io.encodings.ascii unicode.case ;" + "USING: io.encodings.ascii unicode ;" "{ \"tomato\" \"orange\" \"banana\" }" "\"out.txt\" ascii [" " [ >upper print ] each" @@ -246,7 +246,7 @@ CONSTANT: google-slides } { $slide "Unicode strings" "Unicode-aware case conversion, char classes, collation, word breaks, and so on..." - { $code "USE: unicode.case" "\"ß\" >upper ." } + { $code "USE: unicode" "\"ß\" >upper ." } } { $slide "Unicode strings" "All external byte I/O is encoded/decoded" diff --git a/extra/tools/which/which.factor b/extra/tools/which/which.factor index ea93245ae2..4077002219 100644 --- a/extra/tools/which/which.factor +++ b/extra/tools/which/which.factor @@ -3,7 +3,7 @@ USING: arrays assocs combinators.short-circuit command-line environment io io.backend io.files io.files.info io.pathnames -kernel namespaces sequences sets splitting system unicode.case ; +kernel namespaces sequences sets splitting system unicode ; IN: tools.which diff --git a/extra/txon/txon.factor b/extra/txon/txon.factor index 2575d4bb61..e04c7c1a3d 100644 --- a/extra/txon/txon.factor +++ b/extra/txon/txon.factor @@ -3,7 +3,7 @@ USING: assocs combinators combinators.short-circuit formatting grouping hashtables io kernel make math math.parser sequences -splitting strings unicode.categories ; +splitting strings unicode ; IN: txon diff --git a/extra/webapps/help/help.factor b/extra/webapps/help/help.factor index aa7885099d..fd53988b93 100644 --- a/extra/webapps/help/help.factor +++ b/extra/webapps/help/help.factor @@ -4,7 +4,7 @@ USING: accessors assocs furnace.actions furnace.redirection help.html help.topics html.components html.forms http.server http.server.dispatchers http.server.static io.directories io.files.temp kernel locals namespaces sequences -unicode.categories urls ; +unicode urls ; IN: webapps.help TUPLE: help-webapp < dispatcher ; diff --git a/extra/zoneinfo/zoneinfo.factor b/extra/zoneinfo/zoneinfo.factor index 19368e32ab..08927bb31d 100644 --- a/extra/zoneinfo/zoneinfo.factor +++ b/extra/zoneinfo/zoneinfo.factor @@ -3,7 +3,7 @@ USING: accessors assocs combinators combinators.short-circuit combinators.smart fry io.encodings.utf8 io.files kernel math.parser math.statistics memoize namespaces sequences -splitting unicode.case calendar arrays ; +splitting unicode calendar arrays ; IN: zoneinfo CONSTANT: zoneinfo-paths diff --git a/unmaintained/dns/cache/nx/nx.factor b/unmaintained/dns/cache/nx/nx.factor index 9904f857ba..292e5f15b5 100644 --- a/unmaintained/dns/cache/nx/nx.factor +++ b/unmaintained/dns/cache/nx/nx.factor @@ -1,6 +1,6 @@ USING: kernel assocs locals combinators - math math.functions system unicode.case ; + math math.functions system unicode ; IN: dns.cache.nx diff --git a/unmaintained/dns/cache/rr/rr.factor b/unmaintained/dns/cache/rr/rr.factor index 2103c033fe..e2ce5239d1 100644 --- a/unmaintained/dns/cache/rr/rr.factor +++ b/unmaintained/dns/cache/rr/rr.factor @@ -1,6 +1,6 @@ USING: kernel sequences assocs sets locals combinators - accessors system math math.functions unicode.case prettyprint + accessors system math math.functions unicode prettyprint combinators.smart dns ; IN: dns.cache.rr diff --git a/unmaintained/dns/server/server.factor b/unmaintained/dns/server/server.factor index 53d118608c..072e6a9708 100644 --- a/unmaintained/dns/server/server.factor +++ b/unmaintained/dns/server/server.factor @@ -1,6 +1,6 @@ USING: kernel combinators sequences sets math threads namespaces continuations - debugger io io.sockets unicode.case accessors destructors + debugger io io.sockets unicode accessors destructors combinators.short-circuit combinators.smart fry arrays dns dns.util dns.misc ; diff --git a/unmaintained/irc-ui/ui.factor b/unmaintained/irc-ui/ui.factor index 62c45882ce..d65817ff68 100644 --- a/unmaintained/irc-ui/ui.factor +++ b/unmaintained/irc-ui/ui.factor @@ -3,7 +3,7 @@ USING: accessors kernel threads combinators concurrency.mailboxes sequences strings hashtables splitting fry assocs hashtables colors - sorting unicode.collation math.order + sorting unicode math.order ui ui.gadgets ui.gadgets.panes ui.gadgets.editors ui.gadgets.scrollers ui.commands ui.gadgets.frames ui.gestures ui.gadgets.tabs ui.gadgets.grids ui.gadgets.packs ui.gadgets.labels diff --git a/unmaintained/persistency/persistency.factor b/unmaintained/persistency/persistency.factor index 92ee7374f9..fd284e45c5 100644 --- a/unmaintained/persistency/persistency.factor +++ b/unmaintained/persistency/persistency.factor @@ -1,7 +1,7 @@ USING: accessors arrays byte-arrays calendar classes classes.tuple classes.tuple.parser combinators db db.queries db.tuples db.types kernel math nmake parser sequences strings -strings.parser unicode.case urls words ; +strings.parser unicode urls words ; IN: persistency TUPLE: persistent id ;