Moving unicode.syntax to unicode.categories.syntax; documenting and modifying syntax

author Daniel Ehrenberg <littledan@Macintosh-122.local>

Sat, 21 Mar 2009 06:11:45 +0000 (01:11 -0500)

committer Daniel Ehrenberg <littledan@Macintosh-122.local>

Sat, 21 Mar 2009 06:11:45 +0000 (01:11 -0500)
author Daniel Ehrenberg <littledan@Macintosh-122.local>
Sat, 21 Mar 2009 06:11:45 +0000 (01:11 -0500)
committer Daniel Ehrenberg <littledan@Macintosh-122.local>
Sat, 21 Mar 2009 06:11:45 +0000 (01:11 -0500)
diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor

index f397ebb2dea8836dbbd2001a51c2fc87258837b1..22d6cddfb973c40b46fff7f019ff6acd8e353556 100644 (file)
--- a/basis/unicode/breaks/breaks.factor
+++ b/basis/unicode/breaks/breaks.factor
@@ -4,7 +4,7 @@ USING: combinators.short-circuit unicode.categories kernel math
  combinators splitting sequences math.parser io.files io assocs
  arrays namespaces make math.ranges unicode.normalize
  unicode.normalize.private values io.encodings.ascii
-unicode.syntax unicode.data compiler.units fry
+unicode.data compiler.units fry unicode.categories.syntax
  alien.syntax sets accessors interval-maps memoize locals words
  simple-flat-file ;
  IN: unicode.breaks
@@ -32,9 +32,9 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ;
          [ drop Control ]
      } case ;
  
-CATEGORY: (extend) Me Mn ;
-: extend? ( ch -- ? )
-    { [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ;
+CATEGORY: extend
+    Me Mn |
+    "Other_Grapheme_Extend" property? ;
  
  : loe? ( ch -- ? )
      "Logical_Order_Exception" property? ;
diff --git a/basis/unicode/case/case.factor b/basis/unicode/case/case.factor

index fa842b8b818a1bed743ea5e46c647877f4c7469d..1ad39317469939c54b144961b84f3df21598c440 100644 (file)
--- a/basis/unicode/case/case.factor
+++ b/basis/unicode/case/case.factor
@@ -1,8 +1,8 @@
  ! Copyright (C) 2008, 2009 Daniel Ehrenberg.
  ! See http://factorcode.org/license.txt for BSD license.
  USING: unicode.data sequences namespaces
-sbufs make unicode.syntax unicode.normalize math hints
-unicode.categories combinators unicode.syntax assocs combinators.short-circuit
+sbufs make unicode.normalize math hints
+unicode.categories combinators assocs combinators.short-circuit
  strings splitting kernel accessors unicode.breaks fry locals ;
  QUALIFIED: ascii
  IN: unicode.case
diff --git a/basis/unicode/categories/categories-docs.factor b/basis/unicode/categories/categories-docs.factor

index b0870e28fb881c90705b87383449d9bccada73bc..924b197417d74fa72ffa29dca29b85c3726e0fbc 100644 (file)
--- a/basis/unicode/categories/categories-docs.factor
+++ b/basis/unicode/categories/categories-docs.factor
@@ -12,6 +12,9 @@ HELP: Letter
  HELP: alpha
  { $class-description "The class of alphanumeric characters." } ;
  
+HELP: math
+{ $class-description "The class of Unicode math characters." } ;
+
  HELP: blank
  { $class-description "The class of whitespace characters." } ;
  
@@ -54,6 +57,8 @@ ARTICLE: "unicode.categories" "Character classes"
  { $subsection uncased }
  { $subsection uncased? }
  { $subsection character }
-{ $subsection character? } ;
+{ $subsection character? }
+{ $subsection math }
+{ $subsection math? } ;
  
  ABOUT: "unicode.categories"
diff --git a/basis/unicode/categories/categories.factor b/basis/unicode/categories/categories.factor

index 0464e31b125063b60fa21489d8865b055efd60b4..126c03c8698c431e5fea9b32be446675122f1948 100644 (file)
--- a/basis/unicode/categories/categories.factor
+++ b/basis/unicode/categories/categories.factor
@@ -1,15 +1,16 @@
  ! Copyright (C) 2008 Daniel Ehrenberg.
  ! See http://factorcode.org/license.txt for BSD license.
-USING: unicode.syntax ;
+USING: unicode.categories.syntax sequences unicode.data ;
  IN: unicode.categories
  
-CATEGORY: blank Zs Zl Zp \r\n ;
-CATEGORY: letter Ll ;
-CATEGORY: LETTER Lu ;
-CATEGORY: Letter Lu Ll Lt Lm Lo ;
+CATEGORY: blank Zs Zl Zp | "\r\n" member? ;
+CATEGORY: letter Ll | "Other_Lowercase" property? ;
+CATEGORY: LETTER Lu | "Other_Uppercase" property? ;
+CATEGORY: Letter Lu Ll Lt Lm Lo Nl ;
  CATEGORY: digit Nd Nl No ;
  CATEGORY-NOT: printable Cc Cf Cs Co Cn ;
-CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No ;
+CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No | "Other_Alphabetic" property? ;
  CATEGORY: control Cc ;
  CATEGORY-NOT: uncased Lu Ll Lt Lm Mn Me ; 
  CATEGORY-NOT: character Cn ;
+CATEGORY: math Sm | "Other_Math" property? ;
diff --git a/basis/unicode/categories/syntax/authors.txt b/basis/unicode/categories/syntax/authors.txt

new file mode 100755 (executable)

index 0000000..f990dd0
--- /dev/null
+++ b/basis/unicode/categories/syntax/authors.txt
@@ -0,0 +1 @@
+Daniel Ehrenberg
diff --git a/basis/unicode/categories/syntax/summary.txt b/basis/unicode/categories/syntax/summary.txt

new file mode 100644 (file)

index 0000000..651d51c
--- /dev/null
+++ b/basis/unicode/categories/syntax/summary.txt
@@ -0,0 +1 @@
+Parsing words used by Unicode implementation
diff --git a/basis/unicode/categories/syntax/syntax-docs.factor b/basis/unicode/categories/syntax/syntax-docs.factor

new file mode 100644 (file)

index 0000000..6293b92
--- /dev/null
+++ b/basis/unicode/categories/syntax/syntax-docs.factor
@@ -0,0 +1,19 @@
+! Copyright (C) 2008 Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.syntax help.markup ;
+IN: unicode.categories.syntax
+
+ABOUT: "unicode.categories.syntax"
+
+ARTICLE: "unicode.categories.syntax" "Unicode category syntax"
+"There is special syntax sugar for making predicate classes which are unions of Unicode general categories, plus some other code."
+{ $subsection POSTPONE: CATEGORY: }
+{ $subsection POSTPONE: CATEGORY-NOT: } ;
+
+HELP: CATEGORY:
+{ $syntax "CATEGORY: foo Nl Pd Lu | \"Diacritic\" property? ;" }
+{ $description "This defines a predicate class which is a subset of code points. In this example, " { $snippet "foo" } " is the class of characters which are in the general category Nl or Pd or Lu, or which have the Diacritic property." } ;
+
+HELP: CATEGORY-NOT:
+{ $syntax "CATEGORY-NOT: foo Nl Pd Lu | \"Diacritic\" property? ;" }
+{ $description "This defines a predicate class which is a subset of code points, the complement of what " { $link POSTPONE: CATEGORY: } " would define. In this example, " { $snippet "foo" } " is the class of characters which are neither in the general category Nl or Pd or Lu, nor have the Diacritic property." } ;
diff --git a/basis/unicode/categories/syntax/syntax-tests.factor b/basis/unicode/categories/syntax/syntax-tests.factor

new file mode 100644 (file)

index 0000000..1ec622f
--- /dev/null
+++ b/basis/unicode/categories/syntax/syntax-tests.factor
@@ -0,0 +1,3 @@
+! Copyright (C) 2009 Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+
diff --git a/basis/unicode/categories/syntax/syntax.factor b/basis/unicode/categories/syntax/syntax.factor

new file mode 100644 (file)

index 0000000..593bb0b
--- /dev/null
+++ b/basis/unicode/categories/syntax/syntax.factor
@@ -0,0 +1,36 @@
+! Copyright (C) 2008, 2009 Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+USING: unicode.data kernel math sequences parser
+bit-arrays namespaces sequences.private arrays classes.parser
+assocs classes.predicate sets fry splitting accessors ;
+IN: unicode.categories.syntax
+
+! For use in CATEGORY:
+SYMBOLS: Cn Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co | ;
+
+<PRIVATE
+
+: >category-array ( categories -- bitarray )
+    categories [ swap member? ] with map >bit-array ;
+
+: [category] ( categories code -- quot )
+    [ >category-array ] dip
+    '[ dup category# _ nth-unsafe [ drop t ] _ if ] ;
+
+: define-category ( word categories code -- )
+    [category] integer swap define-predicate-class ;
+
+: parse-category ( -- word tokens quot )
+    CREATE-CLASS \ ; parse-until { | } split1
+    [ [ name>> ] map ]
+    [ [ [ ] like ] [ [ drop f ] ] if* ] bi* ;
+
+PRIVATE>
+
+: CATEGORY:
+    parse-category define-category ; parsing
+
+: CATEGORY-NOT:
+    parse-category
+    [ categories swap diff ] dip
+    define-category ; parsing
diff --git a/basis/unicode/categories/syntax/tags.txt b/basis/unicode/categories/syntax/tags.txt

new file mode 100755 (executable)

index 0000000..8e27be7
--- /dev/null
+++ b/basis/unicode/categories/syntax/tags.txt
@@ -0,0 +1 @@
+text
diff --git a/basis/unicode/collation/collation.factor b/basis/unicode/collation/collation.factor

index 0c51ea4352efda97386b7528886f4bc2639a288c..b6eddccae074f7257f9226af3e3f217c6b02bb5e 100755 (executable)
--- a/basis/unicode/collation/collation.factor
+++ b/basis/unicode/collation/collation.factor
@@ -4,7 +4,7 @@ USING: combinators.short-circuit sequences io.files
  io.encodings.ascii kernel values splitting accessors math.parser\r
  ascii io assocs strings math namespaces make sorting combinators\r
  math.order arrays unicode.normalize unicode.data locals\r
-unicode.syntax macros sequences.deep words unicode.breaks\r
+macros sequences.deep words unicode.breaks\r
  quotations combinators.short-circuit simple-flat-file ;\r
  IN: unicode.collation\r
  \r
diff --git a/basis/unicode/normalize/normalize.factor b/basis/unicode/normalize/normalize.factor

index 602d9555ea64c26d775f0057cd8b3140b0f1c43f..aca96a56942c315303dc84afd4c52a9061883c7c 100644 (file)
--- a/basis/unicode/normalize/normalize.factor
+++ b/basis/unicode/normalize/normalize.factor
@@ -2,7 +2,7 @@
  ! See http://factorcode.org/license.txt for BSD license.
  USING: ascii sequences namespaces make unicode.data kernel math arrays
  locals sorting.insertion accessors assocs math.order combinators
-unicode.syntax strings sbufs hints combinators.short-circuit vectors ;
+strings sbufs hints combinators.short-circuit vectors ;
  IN: unicode.normalize
  
  <PRIVATE
diff --git a/basis/unicode/syntax/authors.txt b/basis/unicode/syntax/authors.txt

deleted file mode 100755 (executable)

index f990dd0..0000000
--- a/basis/unicode/syntax/authors.txt
+++ /dev/null
@@ -1 +0,0 @@
-Daniel Ehrenberg
diff --git a/basis/unicode/syntax/summary.txt b/basis/unicode/syntax/summary.txt

deleted file mode 100644 (file)

index 651d51c..0000000
--- a/basis/unicode/syntax/summary.txt
+++ /dev/null
@@ -1 +0,0 @@
-Parsing words used by Unicode implementation
diff --git a/basis/unicode/syntax/syntax.factor b/basis/unicode/syntax/syntax.factor

deleted file mode 100644 (file)

index a42adb4..0000000
--- a/basis/unicode/syntax/syntax.factor
+++ /dev/null
@@ -1,35 +0,0 @@
-! Copyright (C) 2008 Daniel Ehrenberg.
-! See http://factorcode.org/license.txt for BSD license.
-USING: unicode.data kernel math sequences parser lexer
-bit-arrays namespaces make sequences.private arrays quotations
-assocs classes.predicate math.order strings.parser sets ;
-IN: unicode.syntax
-
-<PRIVATE
-
-: >category-array ( categories -- bitarray )
-    categories [ swap member? ] with map >bit-array ;
-
-: as-string ( strings -- bit-array )
-    concat unescape-string ;
-
-: [category] ( categories -- quot )
-    [
-        [ [ categories member? not ] filter as-string ] keep 
-        [ categories member? ] filter >category-array
-        [ dup category# ] % , [ nth-unsafe [ drop t ] ] %
-        \ member? 2array >quotation ,
-        \ if ,
-    ] [ ] make ;
-
-: define-category ( word categories -- )
-    [category] integer swap define-predicate-class ;
-
-PRIVATE>
-
-: CATEGORY:
-    CREATE ";" parse-tokens define-category ; parsing
-
-: CATEGORY-NOT:
-    CREATE ";" parse-tokens
-    categories swap diff define-category ; parsing
diff --git a/basis/unicode/syntax/tags.txt b/basis/unicode/syntax/tags.txt

deleted file mode 100755 (executable)

index 8e27be7..0000000
--- a/basis/unicode/syntax/tags.txt
+++ /dev/null
@@ -1 +0,0 @@
-text
diff --git a/basis/unicode/unicode-docs.factor b/basis/unicode/unicode-docs.factor

index 4ae326ac84bf3429c33edb0960b4856fff625277..9450b49f0bd2f14bae20dcc19c15ecfb7093f92c 100644 (file)
--- a/basis/unicode/unicode-docs.factor
+++ b/basis/unicode/unicode-docs.factor
@@ -15,7 +15,7 @@ $nl
  { $vocab-subsection "Word and grapheme breaks" "unicode.breaks" }
  { $vocab-subsection "Unicode normalization" "unicode.normalize" }
  "The following are mostly for internal use:"
-{ $vocab-subsection "Unicode syntax" "unicode.syntax" }
+{ $vocab-subsection "Unicode category syntax" "unicode.categories.syntax" }
  { $vocab-subsection "Unicode data tables" "unicode.data" }
  { $see-also "ascii" "io.encodings" } ;
  
diff --git a/basis/xml/char-classes/char-classes.factor b/basis/xml/char-classes/char-classes.factor

index d510c8a881d47e8d9538db82b0653b0d1b7b3be3..153fca0bb743fffe8d9a4e37f5e4a8b47c8bfbf1 100644 (file)
--- a/basis/xml/char-classes/char-classes.factor
+++ b/basis/xml/char-classes/char-classes.factor
@@ -1,19 +1,26 @@
  ! Copyright (C) 2005, 2009 Daniel Ehrenberg
  ! See http://factorcode.org/license.txt for BSD license.
-USING: kernel sequences unicode.syntax math math.order combinators
-hints ;
+USING: kernel sequences unicode.categories.syntax math math.order
+combinators hints ;
  IN: xml.char-classes
  
-CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_: ;
-: 1.0name-start? ( char -- ? )
-    dup 1.0name-start*? [ drop t ] 
-    [ HEX: 2BB HEX: 2C1 between? ] if ;
+CATEGORY: 1.0name-start
+    Ll Lu Lo Lt Nl | {
+        [ HEX: 2BB HEX: 2C1 between? ]
+        [ "\u000559\u0006E5\u0006E6_:" member? ]
+    } 1|| ;
  
-CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387: ;
+CATEGORY: 1.0name-char
+    Ll Lu Lo Lt Nl Mc Me Mn Lm Nd |
+    "_-.\u000387:" member? ;
  
-CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _: ;
+CATEGORY: 1.1name-start
+    Ll Lu Lo Lm Ln Nl |
+    "_:" member? ;
  
-CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7: ;
+CATEGORY: 1.1name-char
+    Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf |
+    "_-.\u0000b7:" member? ;
  
  : name-start? ( 1.0? char -- ? )
      swap [ 1.0name-start? ] [ 1.1name-start? ] if ;
author	Daniel Ehrenberg <littledan@Macintosh-122.local>
	Sat, 21 Mar 2009 06:11:45 +0000 (01:11 -0500)
committer	Daniel Ehrenberg <littledan@Macintosh-122.local>
	Sat, 21 Mar 2009 06:11:45 +0000 (01:11 -0500)
basis/unicode/breaks/breaks.factor		patch \| blob \| history
basis/unicode/case/case.factor		patch \| blob \| history
basis/unicode/categories/categories-docs.factor		patch \| blob \| history
basis/unicode/categories/categories.factor		patch \| blob \| history
basis/unicode/categories/syntax/authors.txt	[new file with mode: 0755]	patch \| blob
basis/unicode/categories/syntax/summary.txt	[new file with mode: 0644]	patch \| blob
basis/unicode/categories/syntax/syntax-docs.factor	[new file with mode: 0644]	patch \| blob
basis/unicode/categories/syntax/syntax-tests.factor	[new file with mode: 0644]	patch \| blob
basis/unicode/categories/syntax/syntax.factor	[new file with mode: 0644]	patch \| blob
basis/unicode/categories/syntax/tags.txt	[new file with mode: 0755]	patch \| blob
basis/unicode/collation/collation.factor		patch \| blob \| history
basis/unicode/normalize/normalize.factor		patch \| blob \| history
basis/unicode/syntax/authors.txt	[deleted file]	patch \| blob \| history
basis/unicode/syntax/summary.txt	[deleted file]	patch \| blob \| history
basis/unicode/syntax/syntax.factor	[deleted file]	patch \| blob \| history
basis/unicode/syntax/tags.txt	[deleted file]	patch \| blob \| history
basis/unicode/unicode-docs.factor		patch \| blob \| history
basis/xml/char-classes/char-classes.factor		patch \| blob \| history