From ed9c49eee31abeaf966664327b14281f8f4f777e Mon Sep 17 00:00:00 2001 From: John Benediktsson Date: Fri, 5 Aug 2022 16:15:13 -0700 Subject: [PATCH] xmode: update for recent jEdit mode changes --- basis/xmode/loader/loader.factor | 2 +- basis/xmode/loader/syntax/syntax.factor | 38 ++++++--- basis/xmode/marker/marker-tests.factor | 12 +-- basis/xmode/marker/marker.factor | 102 +++++++++++++++++++----- basis/xmode/rules/rules.factor | 13 ++- basis/xmode/utilities/utilities.factor | 4 +- 6 files changed, 123 insertions(+), 48 deletions(-) diff --git a/basis/xmode/loader/loader.factor b/basis/xmode/loader/loader.factor index 6bd230eb7f..fa01ca1796 100644 --- a/basis/xmode/loader/loader.factor +++ b/basis/xmode/loader/loader.factor @@ -27,7 +27,7 @@ RULE: SPAN span-rule parse-rule-tag shared-tag-attrs delegate-attr match-type-attr span-attrs parse-begin/end-tags init-span-tag ; RULE: SPAN_REGEXP span-rule parse-rule-tag - shared-tag-attrs delegate-attr match-type-attr span-attrs regexp-attr parse-begin/end-tags init-span-tag ; + shared-tag-attrs delegate-attr match-type-attr span-attrs regexp-attr parse-regexp-begin/end-tags init-span-tag ; RULE: EOL_SPAN eol-span-rule parse-rule-tag shared-tag-attrs delegate-attr match-type-attr literal-start init-eol-span-tag ; diff --git a/basis/xmode/loader/syntax/syntax.factor b/basis/xmode/loader/syntax/syntax.factor index b2e91a77fa..e626d230da 100644 --- a/basis/xmode/loader/syntax/syntax.factor +++ b/basis/xmode/loader/syntax/syntax.factor @@ -36,8 +36,7 @@ SYNTAX: RULE: [ "NAME" attr ] [ "VALUE" attr ] bi ; : parse-props-tag ( tag -- assoc ) - children-tags - [ parse-prop-tag ] H{ } map>assoc ; + children-tags [ parse-prop-tag ] H{ } map>assoc ; : position-attrs ( tag -- at-line-start? at-whitespace-end? at-word-start? ) ! XXX Wrong logic! @@ -63,16 +62,25 @@ SYNTAX: RULE: : delegate-attr ( -- ) { "DELEGATE" f delegate<< } , ; +! XXX: check HASH_CHAR for full prefix, not just first character + +: char<< ( value object -- ) + [ ?first ] dip chars<< ; + : regexp-attr ( -- ) - { "HASH_CHAR" f chars<< } , ; + { "HASH_CHAR" f char<< } , + { "HASH_CHARS" f chars<< } , ; : match-type-attr ( -- ) { "MATCH_TYPE" string>match-type match-token<< } , ; +: string>escape ( str -- escape/f ) + [ f ] [ ] if-empty ; + : span-attrs ( -- ) { "NO_LINE_BREAK" string>boolean no-line-break?<< } , { "NO_WORD_BREAK" string>boolean no-word-break?<< } , - { "NO_ESCAPE" string>boolean no-escape?<< } , ; + { "ESCAPE" string>escape escape-rule<< } , ; : literal-start ( -- ) [ parse-literal-matcher >>start drop ] , ; @@ -83,22 +91,30 @@ SYNTAX: RULE: : literal-end ( -- ) [ parse-literal-matcher >>end drop ] , ; -! SPAN's children TAGS: parse-begin/end-tag ( rule tag -- ) TAG: BEGIN parse-begin/end-tag - ! XXX parse-literal-matcher >>start drop ; TAG: END parse-begin/end-tag - ! XXX parse-literal-matcher >>end drop ; : parse-begin/end-tags ( -- ) - [ - ! XXX: handle position attrs on span tag itself - children-tags [ parse-begin/end-tag ] with each - ] , ; + [ children-tags [ parse-begin/end-tag ] with each ] , ; + +TAGS: parse-regexp-begin/end-tag ( rule tag -- ) + +TAG: BEGIN parse-regexp-begin/end-tag + parse-regexp-matcher >>start drop ; + +! XXX: END AT_WHITESPACE_END="TRUE"? + +TAG: END parse-regexp-begin/end-tag + dup "REGEXP" attr string>boolean + [ parse-regexp-matcher ] [ parse-literal-matcher ] if >>end drop ; + +: parse-regexp-begin/end-tags ( -- ) + [ children-tags [ parse-regexp-begin/end-tag ] with each ] , ; : init-span-tag ( -- ) [ drop init-span ] , ; diff --git a/basis/xmode/marker/marker-tests.factor b/basis/xmode/marker/marker-tests.factor index b6f6676016..6989e7fca5 100644 --- a/basis/xmode/marker/marker-tests.factor +++ b/basis/xmode/marker/marker-tests.factor @@ -37,13 +37,13 @@ xmode.marker tools.test kernel ; { { - T{ token f "//" COMMENT2 } - T{ token f " " COMMENT2 } - T{ token f "hello" COMMENT2 } - T{ token f " " COMMENT2 } - T{ token f "world" COMMENT2 } + T{ token f "#" COMMENT1 } + T{ token f " " COMMENT1 } + T{ token f "hello" COMMENT1 } + T{ token f " " COMMENT1 } + T{ token f "world" COMMENT1 } } -} [ f "// hello world" "java" load-mode tokenize-line nip ] unit-test +} [ f "# hello world" "python" load-mode tokenize-line nip ] unit-test { diff --git a/basis/xmode/marker/marker.factor b/basis/xmode/marker/marker.factor index 7e7d0b5ccf..d296238208 100644 --- a/basis/xmode/marker/marker.factor +++ b/basis/xmode/marker/marker.factor @@ -1,9 +1,11 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. + USING: accessors ascii assocs combinators -combinators.short-circuit kernel make math namespaces regexp -sequences strings xmode.marker.state xmode.rules xmode.tokens -xmode.utilities ; +combinators.short-circuit formatting kernel make math namespaces +regexp regexp.parser sequences splitting strings +xmode.marker.state xmode.rules xmode.tokens xmode.utilities ; + IN: xmode.marker ! Next two words copied from parser-combinators @@ -72,22 +74,81 @@ M: rule match-position drop position get ; : rest-of-line ( -- str ) line get position get tail-slice ; +: match-start ( string regexp -- slice/f ) + first-match dup [ dup from>> 0 = [ drop f ] unless ] when ; + GENERIC: text-matches? ( string text -- match-count/f ) M: f text-matches? 2drop f ; M: string-matcher text-matches? - [ - [ string>> ] [ ignore-case?>> ] bi string-head? - ] keep string>> length and ; + [ string>> ] [ ignore-case?>> ] bi + [ string-head? ] keepd length and ; M: regexp text-matches? - [ >string ] dip first-match dup [ to>> ] when ; + [ >string ] dip match-start dup [ to>> ] when ; + +> [ CHAR: ( = ] count [ f ] when-zero ; + +: nth-index ( n obj seq -- i ) + [ = dup [ drop 1 - dup 0 < ] when ] with find drop nip ; + +: match-group-regexp ( regexp n -- skip-regexp match-regexp ) + [ [ options>> options>string ] [ raw>> ] bi ] dip + CHAR: ( pick nth-index cut CHAR: ) over index 1 + head + rot '[ _ ] bi@ ; + +: skip-first-match ( match regexp -- tailseq ) + first-match [ seq>> ] [ to>> ] bi tail ; + +: nth-match ( match regexp n -- slice/f ) + match-group-regexp [ skip-first-match ] [ first-match ] bi* ; + +: update-match-group ( str match regexp n -- str' ) + [ nth-match ] [ CHAR: 1 + "$%c" sprintf ] bi swap replace ; + +: update-match-groups ( str match regexp -- str' ) + [ >string ] dip + dup #match-groups [ update-match-group ] 2with each-integer ; + +GENERIC: fixup-end ( match regexp end -- end' ) -: rule-start-matches? ( rule -- match-count/f ) - [ start>> dup ] keep can-match-here? [ - rest-of-line swap text>> text-matches? +M: string-matcher fixup-end + [ string>> -rot update-match-groups ] + [ ignore-case?>> ] bi ; + +M: regexp fixup-end + [ raw>> [ -rot update-match-groups ] keep swap ] + [ options>> options>string ] bi { + [ parse-tree>> ] [ options>> ] [ dfa>> ] [ next-match>> ] + } cleave regexp boa ; + +: fixup-end? ( text -- ? ) + { [ regexp? ] [ #match-groups ] } 1&& ; + +: fixup-end/text-matches? ( string regexp rule -- match-count/f ) + [ >string ] 2dip [ [ match-start dup ] keep ] dip pick [ + end>> [ [ fixup-end ] change-text drop ] [ 2drop ] if* + ] [ + 3drop + ] if dup [ to>> ] when ; + +PRIVATE> + +:: rule-start-matches? ( rule -- match-count/f ) + rule start>> dup rule can-match-here? [ + rest-of-line swap text>> + dup fixup-end? [ + rule fixup-end/text-matches? + ] [ + text-matches? + ] if ] [ drop f ] if ; @@ -128,17 +189,16 @@ GENERIC: handle-rule-end ( match-count rule -- ) ] ?if ; : check-escape-rule ( rule -- ? ) - no-escape?>> [ f ] [ - find-escape-rule dup [ - dup rule-start-matches? [ - swap handle-rule-start - delegate-end-escaped? toggle - t - ] [ - drop f - ] if* - ] when - ] if ; + escape-rule>> [ find-escape-rule ] unless* + dup [ + dup rule-start-matches? [ + swap handle-rule-start + delegate-end-escaped? toggle + t + ] [ + drop f + ] if* + ] when ; : check-every-rule ( -- ? ) current-char current-rule-set get-rules diff --git a/basis/xmode/rules/rules.factor b/basis/xmode/rules/rules.factor index 084abb5084..27171b794b 100644 --- a/basis/xmode/rules/rules.factor +++ b/basis/xmode/rules/rules.factor @@ -38,6 +38,7 @@ MEMO: standard-rule-set ( id -- ruleset ) imports>> push ; : inverted-index ( hashes key index -- ) + [ [ { f } ] when-empty ] 2dip [ swapd push-at ] 2curry each ; : ?push-all ( seq1 seq2 -- seq1+seq2 ) @@ -60,13 +61,13 @@ C: matcher TUPLE: rule no-line-break? no-word-break? -no-escape? start end match-token body-token delegate chars +escape-rule ; TUPLE: seq-rule < rule ; @@ -111,10 +112,6 @@ M: regexp text-hash-char drop f ; [ dup rule-chars* >upper swap ] dip rules>> inverted-index ; : add-escape-rule ( string ruleset -- ) - over [ - [ ] dip - 2dup escape-rule<< - add-rule - ] [ - 2drop - ] if ; + '[ + _ [ escape-rule<< ] [ add-rule ] 2bi + ] unless-empty ; diff --git a/basis/xmode/utilities/utilities.factor b/basis/xmode/utilities/utilities.factor index 9ded828f92..2d5e3c7f40 100644 --- a/basis/xmode/utilities/utilities.factor +++ b/basis/xmode/utilities/utilities.factor @@ -1,5 +1,5 @@ USING: combinators kernel namespaces quotations regexp sequences -xml.data xml.traversal ; +splitting xml.data xml.traversal ; IN: xmode.utilities : implies ( x y -- z ) [ not ] dip or ; inline @@ -32,4 +32,6 @@ MACRO: (init-from-tag) ( specs -- quot ) over [ (init-from-tag) ] dip ; inline : ( string ? -- regexp ) + ! handle Java style case-insensitive flags + "(?i)" pick subseq-start [ drop "(?i)" "" replace t ] when "i" "" ? ; -- 2.34.1