! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel math math.order words combinators locals
ascii unicode.categories combinators.short-circuit sequences
-fry macros arrays assocs sets classes mirrors ;
+fry macros arrays assocs sets classes mirrors unicode.script
+unicode.data ;
IN: regexp.classes
SINGLETONS: dot letter-class LETTER-class Letter-class digit-class
SINGLETONS: beginning-of-input ^ end-of-input $ end-of-file
^unix $unix word-break ;
-TUPLE: range from to ;
-C: <range> range
+TUPLE: range-class from to ;
+C: <range-class> range-class
TUPLE: primitive-class class ;
C: <primitive-class> primitive-class
M: integer class-member? ( obj class -- ? ) = ;
-M: range class-member? ( obj class -- ? )
+M: range-class class-member? ( obj class -- ? )
[ from>> ] [ to>> ] bi between? ;
M: letter-class class-member? ( obj class -- ? )
TUPLE: not-class class ;
PREDICATE: not-integer < not-class class>> integer? ;
-PREDICATE: not-primitive < not-class class>> primitive-class? ;
+
+UNION: simple-class
+ primitive-class range-class category-class category-range-class dot ;
+PREDICATE: not-simple < not-class class>> simple-class? ;
M: not-class class-member?
class>> class-member? not ;
[ drop class new seq { } like >>seq ]
} case ; inline
-TUPLE: class-partition integers not-integers primitives not-primitives and or other ;
+TUPLE: class-partition integers not-integers simples not-simples and or other ;
: partition-classes ( seq -- class-partition )
prune
[ integer? ] partition
[ not-integer? ] partition
- [ primitive-class? ] partition ! extend primitive-class to epsilon tags
- [ not-primitive? ] partition
+ [ simple-class? ] partition
+ [ not-simple? ] partition
[ and-class? ] partition
[ or-class? ] partition
class-partition boa ;
: filter-not-integers ( partition -- partition' )
dup
- [ primitives>> ] [ not-primitives>> ] [ or>> ] tri
+ [ simples>> ] [ not-simples>> ] [ or>> ] tri
3append and-class boa
'[ [ class>> _ class-member? ] filter ] change-not-integers ;
: answer-ors ( partition -- partition' )
- dup [ not-integers>> ] [ not-primitives>> ] [ primitives>> ] tri 3append
+ dup [ not-integers>> ] [ not-simples>> ] [ simples>> ] tri 3append
'[ [ _ [ t substitute ] each ] map ] change-or ;
: contradiction? ( partition -- ? )
{
- [ [ primitives>> ] [ not-primitives>> ] bi intersects? ]
+ [ [ simples>> ] [ not-simples>> ] bi intersects? ]
[ other>> f swap member? ]
} 1|| ;
: filter-integers ( partition -- partition' )
dup
- [ primitives>> ] [ not-primitives>> ] [ and>> ] tri
+ [ simples>> ] [ not-simples>> ] [ and>> ] tri
3append or-class boa
'[ [ _ class-member? not ] filter ] change-integers ;
: answer-ands ( partition -- partition' )
- dup [ integers>> ] [ not-primitives>> ] [ primitives>> ] tri 3append
+ dup [ integers>> ] [ not-simples>> ] [ simples>> ] tri 3append
'[ [ _ [ f substitute ] each ] map ] change-and ;
: tautology? ( partition -- ? )
{
- [ [ primitives>> ] [ not-primitives>> ] bi intersects? ]
+ [ [ simples>> ] [ not-simples>> ] bi intersects? ]
[ other>> t swap member? ]
} 1|| ;
M: primitive-class class-member?
class>> class-member? ;
-UNION: class primitive-class not-class or-class and-class range ;
-
TUPLE: condition question yes no ;
C: <condition> condition
sequences fry quotations math.order math.ranges vectors
unicode.categories regexp.transition-tables words sets hashtables
combinators.short-circuit unicode.case unicode.case.private regexp.ast
-regexp.classes ;
+regexp.classes memoize ;
IN: regexp.nfa
! This uses unicode.case.private for ch>upper and ch>lower
[ [ LETTER? ] bi@ and ]
} 2|| ;
-M: range modify-class
+M: range-class modify-class
case-insensitive option? [
dup cased-range? [
[ from>> ] [ to>> ] bi
- [ [ ch>lower ] bi@ <range> ]
- [ [ ch>upper ] bi@ <range> ] 2bi
+ [ [ ch>lower ] bi@ <range-class> ]
+ [ [ ch>upper ] bi@ <range-class> ] 2bi
2array <or-class>
] when
] when ;
-M: class nfa-node
+M: object nfa-node
modify-class add-simple-entry ;
M: with-options nfa-node ( node -- start end )
: char>name ( char -- name ) name-map value-at ;
: property? ( char property -- ? ) properties at interval-key? ;
+! For non-existent characters, use Cn
+CONSTANT: categories
+ { "Cn"
+ "Lu" "Ll" "Lt" "Lm" "Lo"
+ "Mn" "Mc" "Me"
+ "Nd" "Nl" "No"
+ "Pc" "Pd" "Ps" "Pe" "Pi" "Pf" "Po"
+ "Sm" "Sc" "Sk" "So"
+ "Zs" "Zl" "Zp"
+ "Cc" "Cf" "Cs" "Co" }
+
+MEMO: categories-map ( -- hashtable )
+ categories <enum> [ swap ] H{ } assoc-map-as ;
+
+CONSTANT: num-chars HEX: 2FA1E
+
: category# ( char -- category )
! There are a few characters that should be Cn
! that this gives Cf or Mn
[ nip zero? not ] assoc-filter
>hashtable ;
-! For non-existent characters, use Cn
-CONSTANT: categories
- { "Cn"
- "Lu" "Ll" "Lt" "Lm" "Lo"
- "Mn" "Mc" "Me"
- "Nd" "Nl" "No"
- "Pc" "Pd" "Ps" "Pe" "Pi" "Pf" "Po"
- "Sm" "Sc" "Sk" "So"
- "Zs" "Zl" "Zp"
- "Cc" "Cf" "Cs" "Co" }
-
-MEMO: categories-map ( -- hashtable )
- categories <enum> [ swap ] H{ } assoc-map-as ;
-
-CONSTANT: num-chars HEX: 2FA1E
-
! the maximum unicode char in the first 3 planes
: ?set-nth ( val index seq -- )