Delete empty unit tests files, remove 1- and 1+, reorder IN: lines in a lot of places...

[factor.git] / basis / unicode / breaks / breaks.factor
diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor

index ddcb99b829dba82cbd772e004f786348d06f1c8b..7c7b8a1f50771499672eb752680021570141ccd4 100644 (file)
--- a/basis/unicode/breaks/breaks.factor
+++ b/basis/unicode/breaks/breaks.factor
@@ -2,9 +2,11 @@
  ! See http://factorcode.org/license.txt for BSD license.
  USING: combinators.short-circuit unicode.categories kernel math
  combinators splitting sequences math.parser io.files io assocs
-arrays namespaces make math.ranges unicode.normalize.private values
-io.encodings.ascii unicode.syntax unicode.data compiler.units fry
-alien.syntax sets accessors interval-maps memoize locals words ;
+arrays namespaces make math.ranges unicode.normalize
+unicode.normalize.private values io.encodings.ascii
+unicode.data compiler.units fry unicode.categories.syntax
+alien.syntax sets accessors interval-maps memoize locals words
+simple-flat-file ;
  IN: unicode.breaks
  
  <PRIVATE
@@ -30,9 +32,9 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ;
          [ drop Control ]
      } case ;
  
-CATEGORY: (extend) Me Mn ;
-: extend? ( ch -- ? )
-    { [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ;
+CATEGORY: extend
+    Me Mn |
+    "Other_Grapheme_Extend" property? ;
  
  : loe? ( ch -- ? )
      "Logical_Order_Exception" property? ;
@@ -58,7 +60,7 @@ SYMBOL: table
  : finish-table ( -- table )
      table get [ [ 1 = ] map ] map ;
  
-: eval-seq ( seq -- seq ) [ dup word? [ execute ] when ] map ;
+: eval-seq ( seq -- seq ) [ ?execute ] map ;
  
  : (set-table) ( class1 class2 val -- )
      [ table get nth ] dip '[ _ or ] change-nth ;
@@ -70,9 +72,6 @@ SYMBOL: table
  : connect ( class1 class2 -- ) 1 set-table ;
  : disconnect ( class1 class2 -- ) 0 set-table ;
    
-: break-around ( classes1 classes2 -- )
-    [ disconnect ] [ swap disconnect ] 2bi ;
-
  : make-grapheme-table ( -- )
      { CR } { LF } connect
      { Control CR LF } graphemes disconnect
@@ -89,15 +88,22 @@ VALUE: grapheme-table
  : grapheme-break? ( class1 class2 -- ? )
      grapheme-table nth nth not ;
  
-: chars ( i str n -- str[i] str[i+n] )
-    swap [ dupd + ] dip [ ?nth ] curry bi@ ;
-
  PRIVATE>
  
  : first-grapheme ( str -- i )
      unclip-slice grapheme-class over
      [ grapheme-class [ nip ] [ grapheme-break? ] 2bi ] find drop
-    nip swap length or 1+ ;
+    nip swap length or 1 + ;
+
+: first-grapheme-from ( start str -- i )
+    over tail-slice first-grapheme + ;
+
+: last-grapheme ( str -- i )
+    unclip-last-slice grapheme-class swap
+    [ grapheme-class dup rot grapheme-break? ] find-last drop ?1+ nip ;
+
+: last-grapheme-from ( end str -- i )
+    swap head-slice last-grapheme ;
  
  <PRIVATE
  
@@ -112,10 +118,6 @@ PRIVATE>
  : string-reverse ( str -- rts )
      >graphemes reverse concat ;
  
-: last-grapheme ( str -- i )
-    unclip-last-slice grapheme-class swap
-    [ grapheme-class dup rot grapheme-break? ] find-last drop ?1+ nip ;
-
  <PRIVATE
  
  graphemes init-table table
@@ -126,7 +128,7 @@ to: grapheme-table
  
  VALUE: word-break-table
  
-"vocab:unicode/data/WordBreakProperty.txt" load-script
+"vocab:unicode/data/WordBreakProperty.txt" load-interval-file
  to: word-break-table
  
  C-ENUM: wOther wCR wLF wNewline wExtend wFormat wKatakana wALetter wMidLetter
@@ -190,13 +192,13 @@ to: word-table
      swap [ format/extended? not ] find-from drop ;
  
  : walk-up ( str i -- j )
-    dupd 1+ (walk-up) [ 1+ (walk-up) ] [ drop f ] if* ;
+    dupd 1 + (walk-up) [ 1 + (walk-up) ] [ drop f ] if* ;
  
  : (walk-down) ( str i -- j )
      swap [ format/extended? not ] find-last-from drop ;
  
  : walk-down ( str i -- j )
-    dupd (walk-down) [ 1- (walk-down) ] [ drop f ] if* ;
+    dupd (walk-down) [ 1 - (walk-down) ] [ drop f ] if* ;
  
  : word-break? ( str i table-entry -- ? )
      {
@@ -224,7 +226,33 @@ PRIVATE>
  : first-word ( str -- i )
      [ unclip-slice word-break-prop over <enum> ] keep
      '[ swap _ word-break-next ] assoc-find 2drop
-    nip swap length or 1+ ;
+    nip swap length or 1 + ;
  
  : >words ( str -- words )
      [ first-word ] >pieces ;
+
+<PRIVATE
+
+: nth-next ( i str -- str[i-1] str[i] )
+    [ [ 1 - ] keep ] dip '[ _ nth ] bi@ ;
+
+PRIVATE>
+
+: word-break-at? ( i str -- ? )
+    {
+        [ drop zero? ]
+        [ length = ]
+        [
+            [ nth-next [ word-break-prop ] dip ] 2keep
+            word-break-next nip
+        ]
+    } 2|| ;
+
+: first-word-from ( start str -- i )
+    over tail-slice first-word + ;
+
+: last-word ( str -- i )
+    [ length ] keep '[ _ word-break-at? ] find-last drop 0 or ;
+
+: last-word-from ( end str -- i )
+    swap head-slice last-word ;