Rewriting basis/wrap with Knuth's algorithm. Minor API changes will probably break...

author Daniel Ehrenberg <littledan@Macintosh-103.local>

Sun, 8 Feb 2009 23:13:28 +0000 (17:13 -0600)

committer Daniel Ehrenberg <littledan@Macintosh-103.local>

Sun, 8 Feb 2009 23:13:28 +0000 (17:13 -0600)
author Daniel Ehrenberg <littledan@Macintosh-103.local>
Sun, 8 Feb 2009 23:13:28 +0000 (17:13 -0600)
committer Daniel Ehrenberg <littledan@Macintosh-103.local>
Sun, 8 Feb 2009 23:13:28 +0000 (17:13 -0600)
diff --git a/basis/wrap/wrap-docs.factor b/basis/wrap/wrap-docs.factor

index c94e12907f369ca119ac99d869baac4e9b4faf09..09ddec36edf8653b407389b2ef2b36afe6100df5 100644 (file)
--- a/basis/wrap/wrap-docs.factor
+++ b/basis/wrap/wrap-docs.factor
@@ -10,10 +10,10 @@ ARTICLE: "wrap" "Word wrapping"
  { $subsection wrap-lines }
  { $subsection wrap-string }
  { $subsection wrap-indented-string }
-"Additionally, the vocabulary provides capabilities to wrap arbitrary groups of things, in units called words."
-{ $subsection wrap }
-{ $subsection word }
-{ $subsection <word> } ;
+"Additionally, the vocabulary provides capabilities to wrap arbitrary groups of things, in units called elements."
+{ $subsection wrap-elements }
+{ $subsection element }
+{ $subsection <element> } ;
  
  HELP: wrap-lines
  { $values { "lines" string } { "width" integer } { "newlines" "sequence of strings" } }
@@ -27,15 +27,15 @@ HELP: wrap-indented-string
  { $values { "string" string } { "width" integer } { "indent" string } { "newstring" string } }
  { $description "Given a string, alters the whitespace in the string so that each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space. Before each line, the indent string is added." } ;
  
-HELP: wrap
-{ $values { "words" { "a sequence of " { $instance word } "s" } } { "width" integer } { "lines" "a sequence of sequences of words" } }
-{ $description "Divides the words into lines, where the sum of the lengths of the words on a line (not counting breaks at the end of the line) is at most the given width. Every line except for the first one starts with a non-break, and every one but the last ends with a break." } ;
+HELP: wrap-elements
+{ $values { "elements" { "a sequence of " { $instance element } "s" } } { "line-max" integer } { "line-ideal" integer } { "lines" "a sequence of sequences of words" } }
+{ $description "Divides the words into lines, where the sum of the lengths of the words on a line (not counting breaks at the end of the line) is at most the given maximum. The returned set of lines is optimized to minimize the square of the deviation of each line from the ideal width. It is not guaranteed to be the minimal number of lines. Every line except for the first one starts with a non-break, and every one but the last ends with a break." } ;
  
-HELP: word
-{ $class-description "A word, for the purposes of " { $vocab-link "wrap" } ", is a Factor object annotated with a length (in the " { $snippet "width" } " slot) and knowledge about whether it is an allowable position for an optional line break (in the " { $snippet "break?" } " slot). Words can be created with " { $link <word> } "." }
-{ $see-also wrap } ;
+HELP: element
+{ $class-description "An element is a Factor object annotated with a length (in the " { $snippet "width" } " slot) and knowledge about whether it is an allowable position for an optional line break (in the " { $snippet "break?" } " slot). Elements can be created with " { $link <element> } "." }
+{ $see-also wrap-elements } ;
  
-HELP: <word>
-{ $values { "key" object } { "width" integer } { "break?" { { $link t } " or " { $link POSTPONE: f } } } { "word" word } }
-{ $description "Creates a " { $link word } " object with the given parameters." }
-{ $see-also wrap } ;
+HELP: <element>
+{ $values { "key" object } { "width" integer } { "break?" { { $link t } " or " { $link POSTPONE: f } } } { "element" element } }
+{ $description "Creates an " { $link element } " object with the given parameters." }
+{ $see-also wrap-elements } ;
diff --git a/basis/wrap/wrap-tests.factor b/basis/wrap/wrap-tests.factor

index ba5168a1c2b4e958fd2d3f928e3b0d6ff9b5c8b1..98d0b712f72793fa47a3a577d51550783eecc633 100644 (file)
--- a/basis/wrap/wrap-tests.factor
+++ b/basis/wrap/wrap-tests.factor
@@ -6,49 +6,77 @@ IN: wrap.tests
  [
      {
          {
-            T{ word f 1 10 f }
-            T{ word f 2 10 f }
-            T{ word f 3 2 t }
+            T{ element f 1 10 f }
+            T{ element f 2 10 f }
+            T{ element f 3 2 t }
          }
          {
-            T{ word f 4 10 f }
-            T{ word f 5 10 f }
+            T{ element f 4 10 f }
+            T{ element f 5 10 f }
          }
      }
  ] [
      {
-        T{ word f 1 10 f }
-        T{ word f 2 10 f }
-        T{ word f 3 2 t }
-        T{ word f 4 10 f }
-        T{ word f 5 10 f }
-    } 35 wrap [ { } like ] map
+        T{ element f 1 10 f }
+        T{ element f 2 10 f }
+        T{ element f 3 2 t }
+        T{ element f 4 10 f }
+        T{ element f 5 10 f }
+    } 35 35 wrap-elements [ { } like ] map
  ] unit-test
  
  [
      {
          {
-            T{ word f 1 10 f }
-            T{ word f 2 10 f }
-            T{ word f 3 9 t }
-            T{ word f 3 9 t }
-            T{ word f 3 9 t }
+            T{ element f 1 10 f }
+            T{ element f 2 10 f }
+            T{ element f 3 9 t }
+            T{ element f 3 9 t }
+            T{ element f 3 9 t }
          }
          {
-            T{ word f 4 10 f }
-            T{ word f 5 10 f }
+            T{ element f 4 10 f }
+            T{ element f 5 10 f }
          }
      }
  ] [
      {
-        T{ word f 1 10 f }
-        T{ word f 2 10 f }
-        T{ word f 3 9 t }
-        T{ word f 3 9 t }
-        T{ word f 3 9 t }
-        T{ word f 4 10 f }
-        T{ word f 5 10 f }
-    } 35 wrap [ { } like ] map
+        T{ element f 1 10 f }
+        T{ element f 2 10 f }
+        T{ element f 3 9 t }
+        T{ element f 3 9 t }
+        T{ element f 3 9 t }
+        T{ element f 4 10 f }
+        T{ element f 5 10 f }
+    } 35 35 wrap-elements [ { } like ] map
+] unit-test
+
+[
+    {
+        {
+            T{ element f 1 10 t }
+            T{ element f 1 10 f }
+            T{ element f 3 9 t }
+        }
+        {
+            T{ element f 2 10 f }
+            T{ element f 3 9 t }
+        }
+        {
+            T{ element f 4 10 f }
+            T{ element f 5 10 f }
+        }
+    }
+] [
+    {
+        T{ element f 1 10 t }
+        T{ element f 1 10 f }
+        T{ element f 3 9 t }
+        T{ element f 2 10 f }
+        T{ element f 3 9 t }
+        T{ element f 4 10 f }
+        T{ element f 5 10 f }
+    } 35 35 wrap-elements [ { } like ] map
  ] unit-test
  
  [
@@ -75,8 +103,13 @@ word wrap.">
      "  " wrap-indented-string
  ] unit-test
  
-[ "this text\nhas lots of\nspaces" ]
+[ "this text\nhas lots\nof spaces" ]
  [ "this text        has lots of       spaces" 12 wrap-string ] unit-test
  
  [ "hello\nhow\nare\nyou\ntoday?" ]
  [ "hello how are you today?" 3 wrap-string ] unit-test
+
+[ "aaa\nbb cc\nddddd" ] [ "aaa bb cc ddddd" 6 wrap-string ] unit-test
+[ "aaa\nbb ccc\ndddddd" ] [ "aaa bb ccc dddddd" 6 wrap-string ] unit-test
+[ "aaa bb\ncccc\nddddd" ] [ "aaa bb cccc ddddd" 6 wrap-string ] unit-test
+[ "aaa bb\nccccccc\nddddddd" ] [ "aaa bb ccccccc ddddddd" 6 wrap-string ] unit-test
diff --git a/basis/wrap/wrap.factor b/basis/wrap/wrap.factor

index e93509b58e4bab5c2141f784a34ebd1cd5bb2003..458d2f86d1ab074458e44ff4c48af91e8cc67fa9 100644 (file)
--- a/basis/wrap/wrap.factor
+++ b/basis/wrap/wrap.factor
@@ -1,70 +1,165 @@
-! Copyright (C) 2008, 2009 Daniel Ehrenberg, Slava Pestov
-! See http://factorcode.org/license.txt for BSD license.
-USING: sequences kernel namespaces make splitting
-math math.order fry assocs accessors ;
+USING: kernel sequences math arrays locals fry accessors splitting
+make combinators.short-circuit namespaces grouping splitting.monotonic ;
  IN: wrap
  
-! Word wrapping/line breaking -- not Unicode-aware
-
-TUPLE: word key width break? ;
+<PRIVATE
  
+! black is the text length, white is the whitespace length
+TUPLE: word contents black white ;
  C: <word> word
  
-<PRIVATE
+: word-length ( word -- n )
+    [ black>> ] [ white>> ] bi + ;
+
+TUPLE: cons cdr car ; ! This order works out better
+C: <cons> cons
+
+: >cons< ( cons -- cdr car )
+    [ cdr>> ] [ car>> ] bi ;
+
+: list-each ( list quot -- )
+    over [
+        [ [ car>> ] dip call ]
+        [ [ cdr>> ] dip list-each ] 2bi
+    ] [ 2drop ] if ; inline recursive
+
+: singleton? ( list -- ? )
+    { [ ] [ cdr>> not ] } 1&& ;
+
+: <singleton> ( elt -- list )
+    f swap <cons> ;
+
+: list>array ( list -- array )
+    [ [ , ] list-each ] { } make ;
+
+: lists>arrays ( lists -- arrays )
+    [ [ list>array , ] list-each ] { } make ;
+
+TUPLE: paragraph lines head-width tail-cost ;
+C: <paragraph> paragraph
+
+SYMBOL: line-max
+SYMBOL: line-ideal
+
+: deviation ( length -- n )
+    line-ideal get - sq ;
+
+: top-fits? ( paragraph -- ? )
+    [ head-width>> ]
+    [ lines>> singleton? line-ideal line-max ? get ] bi <= ;
+
+: fits? ( paragraph -- ? )
+    ! Make this not count spaces at end
+    { [ lines>> car>> singleton? ] [ top-fits? ] } 1|| ;
+
+:: min-by ( seq quot -- elt )
+    f 1.0/0.0 seq [| key value new |
+        new quot call :> newvalue
+        newvalue value < [ new newvalue ] [ key value ] if
+    ] each drop ; inline
+
+: paragraph-cost ( paragraph -- cost )
+    [ head-width>> deviation ]
+    [ tail-cost>> ] bi + ;
+
+: min-cost ( paragraphs -- paragraph )
+    [ paragraph-cost ] min-by ;
+
+: new-line ( paragraph word -- paragraph )
+    [ [ lines>> ] [ <singleton> ] bi* <cons> ]
+    [ nip black>> ]
+    [ drop paragraph-cost ] 2tri
+    <paragraph> ;
  
-SYMBOL: width
+: glue ( paragraph word -- paragraph )
+    [ [ lines>> >cons< ] dip <cons> <cons> ]
+    [ [ head-width>> ] [ word-length ] bi* + ]
+    [ drop tail-cost>> ] 2tri
+    <paragraph> ;
  
-: break-here? ( column word -- ? )
-    break?>> not [ width get > ] [ drop f ] if ;
+: wrap-step ( paragraphs word -- paragraphs )
+    [ '[ _ glue ] map ]
+    [ [ min-cost ] dip new-line ]
+    2bi prefix
+    [ fits? ] filter ;
  
-: walk ( n words -- n )
-    ! If on a break, take the rest of the breaks
-    ! If not on a break, go back until you hit a break
-    2dup bounds-check? [
-        2dup nth break?>>
-        [ [ break?>> not ] find-from drop ]
-        [ [ break?>> ] find-last-from drop 1+ ] if
-   ] [ drop ] if ;
+: 1paragraph ( word -- paragraph )
+    [ <singleton> <singleton> ]
+    [ black>> ] bi
+    0 <paragraph> ;
  
-: find-optimal-break ( words -- n )
-    [ 0 ] keep
-    [ [ width>> + dup ] keep break-here? ] find drop nip
-    [ 1 max swap walk ] [ drop f ] if* ;
+: post-process ( paragraph -- array )
+    lines>> lists>arrays
+    [ [ contents>> ] map ] map ;
  
-: (wrap) ( words -- )
+: initialize ( words -- words paragraph )
+    <reversed> unclip-slice 1paragraph 1array ;
+
+: wrap ( words line-max line-ideal -- paragraph )
      [
-        dup find-optimal-break
-        [ cut-slice [ , ] [ (wrap) ] bi* ] [ , ] if*
-    ] unless-empty ;
+        line-ideal set
+        line-max set
+        initialize
+        [ wrap-step ] reduce
+        min-cost
+        post-process
+    ] with-scope ;
+
+PRIVATE>
+
+TUPLE: element key width break? ;
+C: <element> element
+
+<PRIVATE
+
+: elements-length ( elements -- length )
+    [ width>> ] map sum ;
+
+: make-word ( whites blacks -- word )
+    [ append ] [ [ elements-length ] bi@ ] 2bi <word> ;
+ 
+: ?first2 ( seq -- first/f second/f )
+    [ 0 swap ?nth ]
+    [ 1 swap ?nth ] bi ;
+
+: split-elements ( seq -- half-words )
+    [ [ break?>> ] bi@ = ] monotonic-split ;
+
+: ?first-break ( seq -- newseq f/word )
+    dup first first break?>>
+    [ unclip-slice f swap make-word ]
+    [ f ] if ;
  
-: intersperse ( seq elt -- seq' )
-    [ '[ _ , ] [ , ] interleave ] { } make ;
+: make-words ( seq f/word -- words )
+    [ 2 <groups> [ ?first2 make-word ] map ] dip
+    [ prefix ] when* ;
+
+: elements>words ( seq -- newseq )
+    split-elements ?first-break make-words ;
+
+PRIVATE>
+
+: wrap-elements ( elements line-max line-ideal -- lines )
+    [ elements>words ] 2dip wrap [ concat ] map ;
+
+<PRIVATE
  
  : split-lines ( string -- words-lines )
      string-lines [
          " \t" split harvest
-        [ dup length f <word> ] map
-        " " 1 t <word> intersperse
+        [ dup length 1 <word> ] map
      ] map ;
  
  : join-words ( wrapped-lines -- lines )
-    [
-        [ break?>> ] trim-slice
-        [ key>> ] map concat
-    ] map ;
+    [ " " join ] map ;
  
  : join-lines ( strings -- string )
      "\n" join ;
  
  PRIVATE>
  
-: wrap ( words width -- lines )
-    width [
-        [ (wrap) ] { } make
-    ] with-variable ;
-
  : wrap-lines ( lines width -- newlines )
-    [ split-lines ] dip '[ _ wrap join-words ] map concat ;
+    [ split-lines ] dip '[ _ dup wrap join-words ] map concat ;
  
  : wrap-string ( string width -- newstring )
      wrap-lines join-lines ;
author	Daniel Ehrenberg <littledan@Macintosh-103.local>
	Sun, 8 Feb 2009 23:13:28 +0000 (17:13 -0600)
committer	Daniel Ehrenberg <littledan@Macintosh-103.local>
	Sun, 8 Feb 2009 23:13:28 +0000 (17:13 -0600)
basis/wrap/wrap-docs.factor		patch \| blob \| history
basis/wrap/wrap-tests.factor		patch \| blob \| history
basis/wrap/wrap.factor		patch \| blob \| history