Privatizing unicode.case:ch>{lower,upper,title}

author Daniel Ehrenberg <littledan@Macintosh-103.local>

Fri, 9 Jan 2009 01:07:46 +0000 (19:07 -0600)

committer Daniel Ehrenberg <littledan@Macintosh-103.local>

Fri, 9 Jan 2009 01:07:46 +0000 (19:07 -0600)
author Daniel Ehrenberg <littledan@Macintosh-103.local>
Fri, 9 Jan 2009 01:07:46 +0000 (19:07 -0600)
committer Daniel Ehrenberg <littledan@Macintosh-103.local>
Fri, 9 Jan 2009 01:07:46 +0000 (19:07 -0600)
diff --git a/basis/ascii/ascii-docs.factor b/basis/ascii/ascii-docs.factor

index 6af697cf8935c09020d4a3846beb283eb2ea76bb..4c783e609cf98073bc6fb2e3d98303ca9bbda7c7 100644 (file)
--- a/basis/ascii/ascii-docs.factor
+++ b/basis/ascii/ascii-docs.factor
@@ -37,6 +37,26 @@ HELP: quotable?
  { $values { "ch" "a character" } { "?" "a boolean" } }\r
  { $description "Tests for characters which may appear in a Factor string literal without escaping." } ;\r
  \r
+HELP: ascii?\r
+{ $values { "ch" "a character" } { "?" "a boolean" } }\r
+{ $description "Tests for whether a number is an ASCII character." } ;\r
+\r
+HELP: ch>lower\r
+{ $values { "ch" "a character" } { "lower" "a character" } }\r
+{ $description "Converts an ASCII character to lower case." } ;\r
+\r
+HELP: ch>upper\r
+{ $values { "ch" "a character" } { "upper" "a character" } }\r
+{ $description "Converts an ASCII character to upper case." } ;\r
+\r
+HELP: >lower\r
+{ $values { "str" "a string" } { "lower" "a string" } }\r
+{ $description "Converts an ASCII string to lower case." } ;\r
+\r
+HELP: >upper\r
+{ $values { "str" "a string" } { "upper" "a string" } }\r
+{ $description "Converts an ASCII string to upper case." } ;\r
+\r
  ARTICLE: "ascii" "ASCII character classes"\r
  "The " { $vocab-link "ascii" } " vocabulary implements traditional ASCII character classes:"\r
  { $subsection blank? }\r
@@ -46,6 +66,12 @@ ARTICLE: "ascii" "ASCII character classes"
  { $subsection printable? }\r
  { $subsection control? }\r
  { $subsection quotable? }\r
-"Modern applications should use Unicode 5.0 instead (" { $vocab-link "unicode.categories" } ")." ;\r
+{ $subsection ascii? }\r
+"ASCII case conversion is also implemented:"\r
+{ $subsection ch>lower }\r
+{ $subsection ch>upper }\r
+{ $subsection >lower }\r
+{ $subsection >upper }\r
+"Modern applications should use Unicode 5.1 instead (" { $vocab-link "unicode.categories" } ")." ;\r
  \r
  ABOUT: "ascii"\r
diff --git a/basis/ascii/ascii-tests.factor b/basis/ascii/ascii-tests.factor

index 7dacce734b7562da14a0f8be48a076ac7c763faf..6f39b32a0110c906865162ff2ce1895e0479df18 100644 (file)
--- a/basis/ascii/ascii-tests.factor
+++ b/basis/ascii/ascii-tests.factor
@@ -12,3 +12,8 @@ IN: ascii.tests
      0 "There are Four Upper Case characters"
      [ LETTER? [ 1+ ] when ] each
  ] unit-test
+
+[ t f ] [ CHAR: \s ascii? 400 ascii? ] unit-test
+
+[ "HELLO HOW ARE YOU?" ] [ "hellO hOw arE YOU?" >upper ] unit-test
+[ "i'm good thx bai" ] [ "I'm Good THX bai" >lower ] unit-test
diff --git a/basis/ascii/ascii.factor b/basis/ascii/ascii.factor

index c009c66cde33a2f7b796679f1a83ab045f455ddf..a64a7b8eb549b9016535ed003183f7844fb87bcf 100644 (file)
--- a/basis/ascii/ascii.factor
+++ b/basis/ascii/ascii.factor
@@ -4,6 +4,8 @@ USING: kernel math math.order sequences
  combinators.short-circuit ;\r
  IN: ascii\r
  \r
+: ascii? ( ch -- ? ) 0 127 between? ; inline\r
+\r
  : blank? ( ch -- ? ) " \t\n\r" member? ; inline\r
  \r
  : letter? ( ch -- ? ) CHAR: a CHAR: z between? ; inline\r
@@ -25,3 +27,15 @@ IN: ascii
  \r
  : alpha? ( ch -- ? )\r
      [ [ Letter? ] [ digit? ] ] 1|| ;\r
+\r
+: ch>lower ( ch -- lower )\r
+   dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ;\r
+\r
+: >lower ( str -- lower )\r
+   [ ch>lower ] map ;\r
+\r
+: ch>upper ( ch -- upper )\r
+    dup CHAR: a CHAR: z between? [ HEX: 20 - ] when ;\r
+\r
+: >upper ( str -- upper )\r
+    [ ch>upper ] map ;\r
diff --git a/basis/regexp/nfa/nfa.factor b/basis/regexp/nfa/nfa.factor

index 76206529487107df89bc84bd75d26c8fc480cd3e..dd116f3d7a807303f8b7d7f21ead4e14f4f0b337 100644 (file)
--- a/basis/regexp/nfa/nfa.factor
+++ b/basis/regexp/nfa/nfa.factor
@@ -3,7 +3,10 @@
  USING: accessors arrays assocs grouping kernel regexp.backend
  locals math namespaces regexp.parser sequences fry quotations
  math.order math.ranges vectors unicode.categories regexp.utils
-regexp.transition-tables words sets regexp.classes unicode.case ;
+regexp.transition-tables words sets regexp.classes unicode.case.private ;
+! This uses unicode.case.private for ch>upper and ch>lower
+! but case-insensitive matching should be done by case-folding everything
+! before processing starts
  IN: regexp.nfa
  
  SYMBOL: negation-mode
@@ -160,6 +163,8 @@ M: LETTER-class nfa-node ( node -- )
  
  M: character-class-range nfa-node ( node -- )
      case-insensitive option? [
+        ! This should be implemented for Unicode by case-folding
+        ! the input and all strings in the regexp.
          dup [ from>> ] [ to>> ] bi
          2dup [ Letter? ] bi@ and [
              rot drop
diff --git a/basis/regexp/parser/parser.factor b/basis/regexp/parser/parser.factor

index 25509ec798b655c6b5ad311dba3664c81cbaa571..2f397538a065f257185488be0e2093614c4a4c2c 100644 (file)
--- a/basis/regexp/parser/parser.factor
+++ b/basis/regexp/parser/parser.factor
@@ -3,8 +3,8 @@
  USING: accessors arrays assocs combinators io io.streams.string
  kernel math math.parser namespaces sets
  quotations sequences splitting vectors math.order
-unicode.categories strings regexp.backend regexp.utils
-unicode.case words locals regexp.classes ;
+strings regexp.backend regexp.utils
+unicode.case unicode.categories words locals regexp.classes ;
  IN: regexp.parser
  
  FROM: math.ranges => [a,b] ;
@@ -261,7 +261,7 @@ ERROR: bad-escaped-literals seq ;
      parse-til-E
      drop1
      [ epsilon ] [
-        [ quot call <constant> ] V{ } map-as
+        quot call [ <constant> ] V{ } map-as
          first|concatenation
      ] if-empty ; inline
  
@@ -269,10 +269,10 @@ ERROR: bad-escaped-literals seq ;
      [ ] (parse-escaped-literals) ;
  
  : lower-case-literals ( -- obj )
-    [ ch>lower ] (parse-escaped-literals) ;
+    [ >lower ] (parse-escaped-literals) ;
  
  : upper-case-literals ( -- obj )
-    [ ch>upper ] (parse-escaped-literals) ;
+    [ >upper ] (parse-escaped-literals) ;
  
  : parse-escaped ( -- obj )
      read1
diff --git a/basis/soundex/soundex.factor b/basis/soundex/soundex.factor

index 416ec4a6bc4bad0110d5bae0705cb1c9203dc092..164f634185f3fd99609cec189196e5cf3680403a 100644 (file)
--- a/basis/soundex/soundex.factor
+++ b/basis/soundex/soundex.factor
@@ -1,6 +1,6 @@
  ! Copyright (C) 2008 Slava Pestov.
  ! See http://factorcode.org/license.txt for BSD license.
-USING: sequences grouping assocs kernel ascii unicode.case tr ;
+USING: sequences grouping assocs kernel ascii ascii tr ;
  IN: soundex
  
  TR: soundex-tr
diff --git a/basis/tr/tr-tests.factor b/basis/tr/tr-tests.factor

index c168f5384d8c830381ef117285318c65d3ea4084..3434c28216366a5114b28d829a930875ab807b51 100644 (file)
--- a/basis/tr/tr-tests.factor
+++ b/basis/tr/tr-tests.factor
@@ -1,5 +1,5 @@
  IN: tr.tests
-USING: tr tools.test unicode.case ;
+USING: tr tools.test ascii ;
  
  TR: tr-test ch>upper "ABC" "XYZ" ;
  
diff --git a/basis/tr/tr.factor b/basis/tr/tr.factor

index 66d8df7d449a939e60b2ba2744154344df8cd1f9..ce535f335aa9e1eeb1b2b4ab67c6a9e67e3248f3 100644 (file)
--- a/basis/tr/tr.factor
+++ b/basis/tr/tr.factor
@@ -1,6 +1,6 @@
  ! Copyright (C) 2008 Slava Pestov.
  ! See http://factorcode.org/license.txt for BSD license.
-USING: byte-arrays strings sequences sequences.private
+USING: byte-arrays strings sequences sequences.private ascii
  fry kernel words parser lexer assocs math math.order summary ;
  IN: tr
  
@@ -11,8 +11,6 @@ M: bad-tr summary
  
  <PRIVATE
  
-: ascii? ( ch -- ? ) 0 127 between? ; inline
-
  : tr-nth ( n mapping -- ch ) nth-unsafe 127 bitand ; inline
  
  : check-tr ( from to -- )
diff --git a/basis/unicode/case/case-docs.factor b/basis/unicode/case/case-docs.factor

index da582c659a2178a7b48391f351f599cd464d1336..02da8e7635959edfd07cb1feaf3b375eb72bcef7 100644 (file)
--- a/basis/unicode/case/case-docs.factor
+++ b/basis/unicode/case/case-docs.factor
@@ -9,10 +9,6 @@ ARTICLE: "unicode.case" "Case mapping"
  { $subsection >lower }
  { $subsection >title }
  { $subsection >case-fold }
-"There are analogous routines which operate on individual code points, but these should " { $emphasis "not be used" } " in general as they have slightly different behavior. In some cases, for example, they do not perform the case operation, as a single code point must expand to more than one."
-{ $subsection ch>upper }
-{ $subsection ch>lower }
-{ $subsection ch>title }
  "To test if a string is in a given case:"
  { $subsection upper? }
  { $subsection lower? }
@@ -53,18 +49,3 @@ HELP: title?
  HELP: case-fold?
  { $values { "string" string } { "?" "a boolean" } }
  { $description "Tests if a string is in case-folded form." } ;
-
-HELP: ch>lower
-{ $values { "ch" "a code point" } { "lower" "a code point" } }
-{ $description "Converts a code point to lower case." }
-{ $warning "Don't use this unless you know what you're doing! " { $code ">lower" } " is not the same as " { $code "[ ch>lower ] map" } "." } ;
-
-HELP: ch>upper
-{ $values { "ch" "a code point" } { "upper" "a code point" } }
-{ $description "Converts a code point to upper case." }
-{ $warning "Don't use this unless you know what you're doing! " { $code ">upper" } " is not the same as " { $code "[ ch>upper ] map" } "." } ;
-
-HELP: ch>title
-{ $values { "ch" "a code point" } { "title" "a code point" } }
-{ $description "Converts a code point to title case." }
-{ $warning "Don't use this unless you know what you're doing! " { $code ">title" } " is not the same as " { $code "[ ch>title ] map" } "." } ;
diff --git a/basis/unicode/case/case.factor b/basis/unicode/case/case.factor

index 99278cd72e73ef78a86b04d7213b5789c2c93fe9..c800205704f66811372d96b3381743726c8b0626 100644 (file)
--- a/basis/unicode/case/case.factor
+++ b/basis/unicode/case/case.factor
@@ -7,11 +7,11 @@ IN: unicode.case
  
  <PRIVATE
  : at-default ( key assoc -- value/key ) [ at ] [ drop ] 2bi or ;
-PRIVATE>
  
  : ch>lower ( ch -- lower ) simple-lower at-default ;
  : ch>upper ( ch -- upper ) simple-upper at-default ;
  : ch>title ( ch -- title ) simple-title at-default ;
+PRIVATE>
  
  SYMBOL: locale ! Just casing locale, or overall?
  
diff --git a/basis/unicode/data/data.factor b/basis/unicode/data/data.factor

index 61a93d93759387186e3fed153262641bcb896e0c..6cf913bffa7290395b91c781157d977175fe555c 100644 (file)
--- a/basis/unicode/data/data.factor
+++ b/basis/unicode/data/data.factor
@@ -128,12 +128,9 @@ VALUE: properties
              cat categories index char table ?set-nth
          ] assoc-each table fill-ranges ] ;
  
-: ascii-lower ( string -- lower )
-    [ dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ] map ;
-
  : process-names ( data -- names-hash )
      1 swap (process-data) [
-        ascii-lower { { CHAR: \s CHAR: - } } substitute swap
+        >lower { { CHAR: \s CHAR: - } } substitute swap
      ] H{ } assoc-map-as ;
  
  : multihex ( hexstring -- string )
diff --git a/basis/unicode/normalize/normalize.factor b/basis/unicode/normalize/normalize.factor

index c8d0eb3f7dc4debafebcd19671d2c8dbd5bfb796..2fbe2e1843120000f08298e00a5493665816d7e8 100644 (file)
--- a/basis/unicode/normalize/normalize.factor
+++ b/basis/unicode/normalize/normalize.factor
@@ -1,7 +1,8 @@
  ! Copyright (C) 2008 Daniel Ehrenberg.
  ! See http://factorcode.org/license.txt for BSD license.
  USING: sequences namespaces make unicode.data kernel math arrays
-locals sorting.insertion accessors assocs math.order combinators ;
+locals sorting.insertion accessors assocs math.order combinators
+unicode.syntax ;
  IN: unicode.normalize
  
  <PRIVATE
diff --git a/basis/xmode/marker/marker.factor b/basis/xmode/marker/marker.factor

index c37d60df147f6dbda49b0b0c719243657aa481c0..3e632cc5afc587765e8c8e17aba7fd234c197f9f 100644 (file)
--- a/basis/xmode/marker/marker.factor
+++ b/basis/xmode/marker/marker.factor
@@ -5,7 +5,7 @@ USING: kernel namespaces make xmode.rules xmode.tokens
  xmode.marker.state xmode.marker.context xmode.utilities
  xmode.catalog sequences math assocs combinators strings
  parser-combinators.regexp splitting parser-combinators ascii
-unicode.case combinators.short-circuit accessors ;
+ascii combinators.short-circuit accessors ;
  
  ! Based on org.gjt.sp.jedit.syntax.TokenMarker
  
diff --git a/extra/benchmark/reverse-complement/reverse-complement.factor b/extra/benchmark/reverse-complement/reverse-complement.factor

index 3298706da305a6d62f20e68c75fa42fd359f5cc4..4147ffabdfa06657cd07c125cb56ac1ee1c111cc 100755 (executable)
--- a/extra/benchmark/reverse-complement/reverse-complement.factor
+++ b/extra/benchmark/reverse-complement/reverse-complement.factor
@@ -3,7 +3,7 @@
  USING: io io.files io.files.temp io.streams.duplex kernel
  sequences sequences.private strings vectors words memoize
  splitting grouping hints tr continuations io.encodings.ascii
-unicode.case ;
+ascii ;
  IN: benchmark.reverse-complement
  
  TR: trans-map ch>upper "ACGTUMRYKVHDB" "TGCAAKYRMBDHV" ;
diff --git a/extra/parser-combinators/regexp/regexp.factor b/extra/parser-combinators/regexp/regexp.factor

index 2becd937f25df767cd7df71e509b42e0792b1a95..1c94308e936b924b8007ef44573463bdee7ea135 100755 (executable)
--- a/extra/parser-combinators/regexp/regexp.factor
+++ b/extra/parser-combinators/regexp/regexp.factor
@@ -2,7 +2,7 @@ USING: arrays combinators kernel lists math math.parser
  namespaces parser lexer parser-combinators
  parser-combinators.simple promises quotations sequences strings
  math.order assocs prettyprint.backend prettyprint.custom memoize
-unicode.case unicode.categories combinators.short-circuit
+ascii unicode.categories combinators.short-circuit
  accessors make io ;
  IN: parser-combinators.regexp
author	Daniel Ehrenberg <littledan@Macintosh-103.local>
	Fri, 9 Jan 2009 01:07:46 +0000 (19:07 -0600)
committer	Daniel Ehrenberg <littledan@Macintosh-103.local>
	Fri, 9 Jan 2009 01:07:46 +0000 (19:07 -0600)
basis/ascii/ascii-docs.factor		patch \| blob \| history
basis/ascii/ascii-tests.factor		patch \| blob \| history
basis/ascii/ascii.factor		patch \| blob \| history
basis/regexp/nfa/nfa.factor		patch \| blob \| history
basis/regexp/parser/parser.factor		patch \| blob \| history
basis/soundex/soundex.factor		patch \| blob \| history
basis/tr/tr-tests.factor		patch \| blob \| history
basis/tr/tr.factor		patch \| blob \| history
basis/unicode/case/case-docs.factor		patch \| blob \| history
basis/unicode/case/case.factor		patch \| blob \| history
basis/unicode/data/data.factor		patch \| blob \| history
basis/unicode/normalize/normalize.factor		patch \| blob \| history
basis/xmode/marker/marker.factor		patch \| blob \| history
extra/benchmark/reverse-complement/reverse-complement.factor		patch \| blob \| history
extra/parser-combinators/regexp/regexp.factor		patch \| blob \| history