1 ! Copyright (C) 2008 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors assocs kernel regexp sequences unicode
7 TUPLE: string-matcher string ignore-case? ;
9 C: <string-matcher> string-matcher
11 ! Based on org.gjt.sp.jedit.syntax.ParserRuleSet
28 : <rule-set> ( -- ruleset )
32 V{ } clone >>imports ;
34 MEMO: standard-rule-set ( id -- ruleset )
35 <rule-set> swap >>default ;
37 : import-rule-set ( import ruleset -- )
40 : inverted-index ( hashes key index -- )
41 [ [ { f } ] when-empty ] 2dip
42 [ swapd push-at ] 2curry each ;
44 : rule-set-no-word-sep* ( ruleset -- str )
45 [ no-word-sep>> ] [ keywords>> ] bi
46 dup [ keyword-map-no-word-sep* ] when
50 TUPLE: matcher text at-line-start? at-whitespace-end? at-word-start? ;
54 ! Based on org.gjt.sp.jedit.syntax.ParserRule
67 TUPLE: seq-rule < rule ;
69 TUPLE: span-rule < rule ;
71 TUPLE: eol-span-rule < rule ;
73 : init-span ( rule -- )
74 dup delegate>> [ drop ] [
75 dup body-token>> standard-rule-set
79 : init-eol-span ( rule -- )
81 t >>no-line-break? drop ;
83 TUPLE: mark-following-rule < rule ;
85 TUPLE: mark-previous-rule < rule ;
87 TUPLE: escape-rule < rule ;
89 : <escape-rule> ( string -- rule )
90 f <string-matcher> f f f <matcher>
91 escape-rule new swap >>start ;
93 GENERIC: text-hash-char ( text -- ch )
97 M: string-matcher text-hash-char string>> first ;
99 M: regexp text-hash-char drop f ;
101 : rule-chars* ( rule -- string )
102 [ chars>> ] [ start>> ] bi text>>
103 text-hash-char [ suffix ] when* ;
105 : add-rule ( rule ruleset -- )
106 [ dup rule-chars* >upper swap ] dip rules>> inverted-index ;
108 : add-escape-rule ( string ruleset -- )
110 <escape-rule> _ [ escape-rule<< ] [ add-rule ] 2bi