shared-tag-attrs delegate-attr match-type-attr span-attrs parse-begin/end-tags init-span-tag ;
RULE: SPAN_REGEXP span-rule parse-rule-tag
- shared-tag-attrs delegate-attr match-type-attr span-attrs regexp-attr parse-begin/end-tags init-span-tag ;
+ shared-tag-attrs delegate-attr match-type-attr span-attrs regexp-attr parse-regexp-begin/end-tags init-span-tag ;
RULE: EOL_SPAN eol-span-rule parse-rule-tag
shared-tag-attrs delegate-attr match-type-attr literal-start init-eol-span-tag ;
[ "NAME" attr ] [ "VALUE" attr ] bi ;
: parse-props-tag ( tag -- assoc )
- children-tags
- [ parse-prop-tag ] H{ } map>assoc ;
+ children-tags [ parse-prop-tag ] H{ } map>assoc ;
: position-attrs ( tag -- at-line-start? at-whitespace-end? at-word-start? )
! XXX Wrong logic!
: delegate-attr ( -- )
{ "DELEGATE" f delegate<< } , ;
+! XXX: check HASH_CHAR for full prefix, not just first character
+
+: char<< ( value object -- )
+ [ ?first ] dip chars<< ;
+
: regexp-attr ( -- )
- { "HASH_CHAR" f chars<< } , ;
+ { "HASH_CHAR" f char<< } ,
+ { "HASH_CHARS" f chars<< } , ;
: match-type-attr ( -- )
{ "MATCH_TYPE" string>match-type match-token<< } , ;
+: string>escape ( str -- escape/f )
+ [ f ] [ <escape-rule> ] if-empty ;
+
: span-attrs ( -- )
{ "NO_LINE_BREAK" string>boolean no-line-break?<< } ,
{ "NO_WORD_BREAK" string>boolean no-word-break?<< } ,
- { "NO_ESCAPE" string>boolean no-escape?<< } , ;
+ { "ESCAPE" string>escape escape-rule<< } , ;
: literal-start ( -- )
[ parse-literal-matcher >>start drop ] , ;
: literal-end ( -- )
[ parse-literal-matcher >>end drop ] , ;
-! SPAN's children
TAGS: parse-begin/end-tag ( rule tag -- )
TAG: BEGIN parse-begin/end-tag
- ! XXX
parse-literal-matcher >>start drop ;
TAG: END parse-begin/end-tag
- ! XXX
parse-literal-matcher >>end drop ;
: parse-begin/end-tags ( -- )
- [
- ! XXX: handle position attrs on span tag itself
- children-tags [ parse-begin/end-tag ] with each
- ] , ;
+ [ children-tags [ parse-begin/end-tag ] with each ] , ;
+
+TAGS: parse-regexp-begin/end-tag ( rule tag -- )
+
+TAG: BEGIN parse-regexp-begin/end-tag
+ parse-regexp-matcher >>start drop ;
+
+! XXX: END AT_WHITESPACE_END="TRUE"?
+
+TAG: END parse-regexp-begin/end-tag
+ dup "REGEXP" attr string>boolean
+ [ parse-regexp-matcher ] [ parse-literal-matcher ] if >>end drop ;
+
+: parse-regexp-begin/end-tags ( -- )
+ [ children-tags [ parse-regexp-begin/end-tag ] with each ] , ;
: init-span-tag ( -- ) [ drop init-span ] , ;
{
{
- T{ token f "//" COMMENT2 }
- T{ token f " " COMMENT2 }
- T{ token f "hello" COMMENT2 }
- T{ token f " " COMMENT2 }
- T{ token f "world" COMMENT2 }
+ T{ token f "#" COMMENT1 }
+ T{ token f " " COMMENT1 }
+ T{ token f "hello" COMMENT1 }
+ T{ token f " " COMMENT1 }
+ T{ token f "world" COMMENT1 }
}
-} [ f "// hello world" "java" load-mode tokenize-line nip ] unit-test
+} [ f "# hello world" "python" load-mode tokenize-line nip ] unit-test
{
! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
+
USING: accessors ascii assocs combinators
-combinators.short-circuit kernel make math namespaces regexp
-sequences strings xmode.marker.state xmode.rules xmode.tokens
-xmode.utilities ;
+combinators.short-circuit formatting kernel make math namespaces
+regexp regexp.parser sequences splitting strings
+xmode.marker.state xmode.rules xmode.tokens xmode.utilities ;
+
IN: xmode.marker
! Next two words copied from parser-combinators
: rest-of-line ( -- str )
line get position get tail-slice ;
+: match-start ( string regexp -- slice/f )
+ first-match dup [ dup from>> 0 = [ drop f ] unless ] when ;
+
GENERIC: text-matches? ( string text -- match-count/f )
M: f text-matches?
2drop f ;
M: string-matcher text-matches?
- [
- [ string>> ] [ ignore-case?>> ] bi string-head?
- ] keep string>> length and ;
+ [ string>> ] [ ignore-case?>> ] bi
+ [ string-head? ] keepd length and ;
M: regexp text-matches?
- [ >string ] dip first-match dup [ to>> ] when ;
+ [ >string ] dip match-start dup [ to>> ] when ;
+
+<PRIVATE
+
+! XXX: Terrible inefficient regexp match group support
+
+: #match-groups ( regexp -- n/f )
+ raw>> [ CHAR: ( = ] count [ f ] when-zero ;
+
+: nth-index ( n obj seq -- i )
+ [ = dup [ drop 1 - dup 0 < ] when ] with find drop nip ;
+
+: match-group-regexp ( regexp n -- skip-regexp match-regexp )
+ [ [ options>> options>string ] [ raw>> ] bi ] dip
+ CHAR: ( pick nth-index cut CHAR: ) over index 1 + head
+ rot '[ _ <optioned-regexp> ] bi@ ;
+
+: skip-first-match ( match regexp -- tailseq )
+ first-match [ seq>> ] [ to>> ] bi tail ;
+
+: nth-match ( match regexp n -- slice/f )
+ match-group-regexp [ skip-first-match ] [ first-match ] bi* ;
+
+: update-match-group ( str match regexp n -- str' )
+ [ nth-match ] [ CHAR: 1 + "$%c" sprintf ] bi swap replace ;
+
+: update-match-groups ( str match regexp -- str' )
+ [ >string ] dip
+ dup #match-groups [ update-match-group ] 2with each-integer ;
+
+GENERIC: fixup-end ( match regexp end -- end' )
-: rule-start-matches? ( rule -- match-count/f )
- [ start>> dup ] keep can-match-here? [
- rest-of-line swap text>> text-matches?
+M: string-matcher fixup-end
+ [ string>> -rot update-match-groups ]
+ [ ignore-case?>> ] bi <string-matcher> ;
+
+M: regexp fixup-end
+ [ raw>> [ -rot update-match-groups ] keep swap ]
+ [ options>> options>string ] bi <optioned-regexp> {
+ [ parse-tree>> ] [ options>> ] [ dfa>> ] [ next-match>> ]
+ } cleave regexp boa ;
+
+: fixup-end? ( text -- ? )
+ { [ regexp? ] [ #match-groups ] } 1&& ;
+
+: fixup-end/text-matches? ( string regexp rule -- match-count/f )
+ [ >string ] 2dip [ [ match-start dup ] keep ] dip pick [
+ end>> [ [ fixup-end ] change-text drop ] [ 2drop ] if*
+ ] [
+ 3drop
+ ] if dup [ to>> ] when ;
+
+PRIVATE>
+
+:: rule-start-matches? ( rule -- match-count/f )
+ rule start>> dup rule can-match-here? [
+ rest-of-line swap text>>
+ dup fixup-end? [
+ rule fixup-end/text-matches?
+ ] [
+ text-matches?
+ ] if
] [
drop f
] if ;
] ?if ;
: check-escape-rule ( rule -- ? )
- no-escape?>> [ f ] [
- find-escape-rule dup [
- dup rule-start-matches? [
- swap handle-rule-start
- delegate-end-escaped? toggle
- t
- ] [
- drop f
- ] if*
- ] when
- ] if ;
+ escape-rule>> [ find-escape-rule ] unless*
+ dup [
+ dup rule-start-matches? [
+ swap handle-rule-start
+ delegate-end-escaped? toggle
+ t
+ ] [
+ drop f
+ ] if*
+ ] when ;
: check-every-rule ( -- ? )
current-char current-rule-set get-rules
imports>> push ;
: inverted-index ( hashes key index -- )
+ [ [ { f } ] when-empty ] 2dip
[ swapd push-at ] 2curry each ;
: ?push-all ( seq1 seq2 -- seq1+seq2 )
TUPLE: rule
no-line-break?
no-word-break?
-no-escape?
start
end
match-token
body-token
delegate
chars
+escape-rule
;
TUPLE: seq-rule < rule ;
[ dup rule-chars* >upper swap ] dip rules>> inverted-index ;
: add-escape-rule ( string ruleset -- )
- over [
- [ <escape-rule> ] dip
- 2dup escape-rule<<
- add-rule
- ] [
- 2drop
- ] if ;
+ '[
+ <escape-rule> _ [ escape-rule<< ] [ add-rule ] 2bi
+ ] unless-empty ;
USING: combinators kernel namespaces quotations regexp sequences
-xml.data xml.traversal ;
+splitting xml.data xml.traversal ;
IN: xmode.utilities
: implies ( x y -- z ) [ not ] dip or ; inline
over [ (init-from-tag) ] dip ; inline
: <?insensitive-regexp> ( string ? -- regexp )
+ ! handle Java style case-insensitive flags
+ "(?i)" pick subseq-start [ drop "(?i)" "" replace t ] when
"i" "" ? <optioned-regexp> ;