From ed9c49eee31abeaf966664327b14281f8f4f777e Mon Sep 17 00:00:00 2001
From: John Benediktsson <mrjbq7@gmail.com>
Date: Fri, 5 Aug 2022 16:15:13 -0700
Subject: [PATCH] xmode: update for recent jEdit mode changes

---
 basis/xmode/loader/loader.factor        |   2 +-
 basis/xmode/loader/syntax/syntax.factor |  38 ++++++---
 basis/xmode/marker/marker-tests.factor  |  12 +--
 basis/xmode/marker/marker.factor        | 102 +++++++++++++++++++-----
 basis/xmode/rules/rules.factor          |  13 ++-
 basis/xmode/utilities/utilities.factor  |   4 +-
 6 files changed, 123 insertions(+), 48 deletions(-)
diff --git a/basis/xmode/loader/loader.factor b/basis/xmode/loader/loader.factor
index 6bd230eb7f..fa01ca1796 100644
--- a/basis/xmode/loader/loader.factor
+++ b/basis/xmode/loader/loader.factor
@@ -27,7 +27,7 @@ RULE: SPAN span-rule parse-rule-tag
     shared-tag-attrs delegate-attr match-type-attr span-attrs parse-begin/end-tags init-span-tag ;
 
 RULE: SPAN_REGEXP span-rule parse-rule-tag
-    shared-tag-attrs delegate-attr match-type-attr span-attrs regexp-attr parse-begin/end-tags init-span-tag ;
+    shared-tag-attrs delegate-attr match-type-attr span-attrs regexp-attr parse-regexp-begin/end-tags init-span-tag ;
 
 RULE: EOL_SPAN eol-span-rule parse-rule-tag
     shared-tag-attrs delegate-attr match-type-attr literal-start init-eol-span-tag ;
diff --git a/basis/xmode/loader/syntax/syntax.factor b/basis/xmode/loader/syntax/syntax.factor
index b2e91a77fa..e626d230da 100644
--- a/basis/xmode/loader/syntax/syntax.factor
+++ b/basis/xmode/loader/syntax/syntax.factor
@@ -36,8 +36,7 @@ SYNTAX: RULE:
     [ "NAME" attr ] [ "VALUE" attr ] bi ;
 
 : parse-props-tag ( tag -- assoc )
-    children-tags
-    [ parse-prop-tag ] H{ } map>assoc ;
+    children-tags [ parse-prop-tag ] H{ } map>assoc ;
 
 : position-attrs ( tag -- at-line-start? at-whitespace-end? at-word-start? )
     ! XXX Wrong logic!
@@ -63,16 +62,25 @@ SYNTAX: RULE:
 : delegate-attr ( -- )
     { "DELEGATE" f delegate<< } , ;
 
+! XXX: check HASH_CHAR for full prefix, not just first character
+
+: char<< ( value object -- )
+    [ ?first ] dip chars<< ;
+
 : regexp-attr ( -- )
-    { "HASH_CHAR" f chars<< } , ;
+    { "HASH_CHAR" f char<< } ,
+    { "HASH_CHARS" f chars<< } , ;
 
 : match-type-attr ( -- )
     { "MATCH_TYPE" string>match-type match-token<< } , ;
 
+: string>escape ( str -- escape/f )
+    [ f ] [ <escape-rule> ] if-empty ;
+
 : span-attrs ( -- )
     { "NO_LINE_BREAK" string>boolean no-line-break?<< } ,
     { "NO_WORD_BREAK" string>boolean no-word-break?<< } ,
-    { "NO_ESCAPE" string>boolean no-escape?<< } , ;
+    { "ESCAPE" string>escape escape-rule<< } , ;
 
 : literal-start ( -- )
     [ parse-literal-matcher >>start drop ] , ;
@@ -83,22 +91,30 @@ SYNTAX: RULE:
 : literal-end ( -- )
     [ parse-literal-matcher >>end drop ] , ;
 
-! SPAN's children
 TAGS: parse-begin/end-tag ( rule tag -- )
 
 TAG: BEGIN parse-begin/end-tag
-    ! XXX
     parse-literal-matcher >>start drop ;
 
 TAG: END parse-begin/end-tag
-    ! XXX
     parse-literal-matcher >>end drop ;
 
 : parse-begin/end-tags ( -- )
-    [
-        ! XXX: handle position attrs on span tag itself
-        children-tags [ parse-begin/end-tag ] with each
-    ] , ;
+    [ children-tags [ parse-begin/end-tag ] with each ] , ;
+
+TAGS: parse-regexp-begin/end-tag ( rule tag -- )
+
+TAG: BEGIN parse-regexp-begin/end-tag
+    parse-regexp-matcher >>start drop ;
+
+! XXX: END AT_WHITESPACE_END="TRUE"?
+
+TAG: END parse-regexp-begin/end-tag
+    dup "REGEXP" attr string>boolean
+    [ parse-regexp-matcher ] [ parse-literal-matcher ] if >>end drop ;
+
+: parse-regexp-begin/end-tags ( -- )
+    [ children-tags [ parse-regexp-begin/end-tag ] with each ] , ;
 
 : init-span-tag ( -- ) [ drop init-span ] , ;
 
diff --git a/basis/xmode/marker/marker-tests.factor b/basis/xmode/marker/marker-tests.factor
index b6f6676016..6989e7fca5 100644
--- a/basis/xmode/marker/marker-tests.factor
+++ b/basis/xmode/marker/marker-tests.factor
@@ -37,13 +37,13 @@ xmode.marker tools.test kernel ;
 
 {
     {
-        T{ token f "//" COMMENT2 }
-        T{ token f " " COMMENT2 }
-        T{ token f "hello" COMMENT2 }
-        T{ token f " " COMMENT2 }
-        T{ token f "world" COMMENT2 }
+        T{ token f "#" COMMENT1 }
+        T{ token f " " COMMENT1 }
+        T{ token f "hello" COMMENT1 }
+        T{ token f " " COMMENT1 }
+        T{ token f "world" COMMENT1 }
     }
-} [ f "// hello world" "java" load-mode tokenize-line nip ] unit-test
+} [ f "# hello world" "python" load-mode tokenize-line nip ] unit-test
 
 
 {
diff --git a/basis/xmode/marker/marker.factor b/basis/xmode/marker/marker.factor
index 7e7d0b5ccf..d296238208 100644
--- a/basis/xmode/marker/marker.factor
+++ b/basis/xmode/marker/marker.factor
@@ -1,9 +1,11 @@
 ! Copyright (C) 2008 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
+
 USING: accessors ascii assocs combinators
-combinators.short-circuit kernel make math namespaces regexp
-sequences strings xmode.marker.state xmode.rules xmode.tokens
-xmode.utilities ;
+combinators.short-circuit formatting kernel make math namespaces
+regexp regexp.parser sequences splitting strings
+xmode.marker.state xmode.rules xmode.tokens xmode.utilities ;
+
 IN: xmode.marker
 
 ! Next two words copied from parser-combinators
@@ -72,22 +74,81 @@ M: rule match-position drop position get ;
 : rest-of-line ( -- str )
     line get position get tail-slice ;
 
+: match-start ( string regexp -- slice/f )
+    first-match dup [ dup from>> 0 = [ drop f ] unless ] when ;
+
 GENERIC: text-matches? ( string text -- match-count/f )
 
 M: f text-matches?
     2drop f ;
 
 M: string-matcher text-matches?
-    [
-        [ string>> ] [ ignore-case?>> ] bi string-head?
-    ] keep string>> length and ;
+    [ string>> ] [ ignore-case?>> ] bi
+    [ string-head? ] keepd length and ;
 
 M: regexp text-matches?
-    [ >string ] dip first-match dup [ to>> ] when ;
+    [ >string ] dip match-start dup [ to>> ] when ;
+
+<PRIVATE
+
+! XXX: Terrible inefficient regexp match group support
+
+: #match-groups ( regexp -- n/f )
+    raw>> [ CHAR: ( = ] count [ f ] when-zero ;
+
+: nth-index ( n obj seq -- i )
+    [ = dup [ drop 1 - dup 0 < ] when ] with find drop nip ;
+
+: match-group-regexp ( regexp n -- skip-regexp match-regexp )
+    [ [ options>> options>string ] [ raw>> ] bi ] dip
+    CHAR: ( pick nth-index cut CHAR: ) over index 1 + head
+    rot '[ _ <optioned-regexp> ] bi@ ;
+
+: skip-first-match ( match regexp -- tailseq )
+    first-match [ seq>> ] [ to>> ] bi tail ;
+
+: nth-match ( match regexp n -- slice/f )
+    match-group-regexp [ skip-first-match ] [ first-match ] bi* ;
+
+: update-match-group ( str match regexp n -- str' )
+    [ nth-match ] [ CHAR: 1 + "$%c" sprintf ] bi swap replace ;
+
+: update-match-groups ( str match regexp -- str' )
+    [ >string ] dip
+    dup #match-groups [ update-match-group ] 2with each-integer ;
+
+GENERIC: fixup-end ( match regexp end -- end' )
 
-: rule-start-matches? ( rule -- match-count/f )
-    [ start>> dup ] keep can-match-here? [
-        rest-of-line swap text>> text-matches?
+M: string-matcher fixup-end
+    [ string>> -rot update-match-groups ]
+    [ ignore-case?>> ] bi <string-matcher> ;
+
+M: regexp fixup-end
+    [ raw>> [ -rot update-match-groups ] keep swap ]
+    [ options>> options>string ] bi <optioned-regexp> {
+        [ parse-tree>> ] [ options>> ] [ dfa>> ] [ next-match>> ]
+    } cleave regexp boa ;
+
+: fixup-end? ( text -- ? )
+    { [ regexp? ] [ #match-groups ] } 1&& ;
+
+: fixup-end/text-matches? ( string regexp rule -- match-count/f )
+    [ >string ] 2dip [ [ match-start dup ] keep ] dip pick [
+        end>> [ [ fixup-end ] change-text drop ] [ 2drop ] if*
+    ] [
+        3drop
+    ] if dup [ to>> ] when ;
+
+PRIVATE>
+
+:: rule-start-matches? ( rule -- match-count/f )
+    rule start>> dup rule can-match-here? [
+        rest-of-line swap text>>
+        dup fixup-end? [
+            rule fixup-end/text-matches?
+        ] [
+            text-matches?
+        ] if
     ] [
         drop f
     ] if ;
@@ -128,17 +189,16 @@ GENERIC: handle-rule-end ( match-count rule -- )
     ] ?if ;
 
 : check-escape-rule ( rule -- ? )
-    no-escape?>> [ f ] [
-        find-escape-rule dup [
-            dup rule-start-matches? [
-                swap handle-rule-start
-                delegate-end-escaped? toggle
-                t
-            ] [
-                drop f
-            ] if*
-        ] when
-    ] if ;
+    escape-rule>> [ find-escape-rule ] unless*
+    dup [
+        dup rule-start-matches? [
+            swap handle-rule-start
+            delegate-end-escaped? toggle
+            t
+        ] [
+            drop f
+        ] if*
+    ] when ;
 
 : check-every-rule ( -- ? )
     current-char current-rule-set get-rules
diff --git a/basis/xmode/rules/rules.factor b/basis/xmode/rules/rules.factor
index 084abb5084..27171b794b 100644
--- a/basis/xmode/rules/rules.factor
+++ b/basis/xmode/rules/rules.factor
@@ -38,6 +38,7 @@ MEMO: standard-rule-set ( id -- ruleset )
     imports>> push ;
 
 : inverted-index ( hashes key index -- )
+    [ [ { f } ] when-empty ] 2dip
     [ swapd push-at ] 2curry each ;
 
 : ?push-all ( seq1 seq2 -- seq1+seq2 )
@@ -60,13 +61,13 @@ C: <matcher> matcher
 TUPLE: rule
 no-line-break?
 no-word-break?
-no-escape?
 start
 end
 match-token
 body-token
 delegate
 chars
+escape-rule
 ;
 
 TUPLE: seq-rule < rule ;
@@ -111,10 +112,6 @@ M: regexp text-hash-char drop f ;
     [ dup rule-chars* >upper swap ] dip rules>> inverted-index ;
 
 : add-escape-rule ( string ruleset -- )
-    over [
-        [ <escape-rule> ] dip
-        2dup escape-rule<<
-        add-rule
-    ] [
-        2drop
-    ] if ;
+    '[
+        <escape-rule> _ [ escape-rule<< ] [ add-rule ] 2bi
+    ] unless-empty ;
diff --git a/basis/xmode/utilities/utilities.factor b/basis/xmode/utilities/utilities.factor
index 9ded828f92..2d5e3c7f40 100644
--- a/basis/xmode/utilities/utilities.factor
+++ b/basis/xmode/utilities/utilities.factor
@@ -1,5 +1,5 @@
 USING: combinators kernel namespaces quotations regexp sequences
-xml.data xml.traversal ;
+splitting xml.data xml.traversal ;
 IN: xmode.utilities
 
 : implies ( x y -- z ) [ not ] dip or ; inline
@@ -32,4 +32,6 @@ MACRO: (init-from-tag) ( specs -- quot )
     over [ (init-from-tag) ] dip ; inline
 
 : <?insensitive-regexp> ( string ? -- regexp )
+    ! handle Java style case-insensitive flags
+    "(?i)" pick subseq-start [ drop "(?i)" "" replace t ] when
     "i" "" ? <optioned-regexp> ;
-- 
2.34.1