xmode: update for recent jEdit mode changes

author John Benediktsson <mrjbq7@gmail.com>

Fri, 5 Aug 2022 23:15:13 +0000 (16:15 -0700)

committer John Benediktsson <mrjbq7@gmail.com>

Fri, 5 Aug 2022 23:15:13 +0000 (16:15 -0700)
author John Benediktsson <mrjbq7@gmail.com>
Fri, 5 Aug 2022 23:15:13 +0000 (16:15 -0700)
committer John Benediktsson <mrjbq7@gmail.com>
Fri, 5 Aug 2022 23:15:13 +0000 (16:15 -0700)
diff --git a/basis/xmode/loader/loader.factor b/basis/xmode/loader/loader.factor

index 6bd230eb7ffcb4e2de7c088a7a4063480d904074..fa01ca1796927738bac508c792cfb4f85bdb0280 100644 (file)
--- a/basis/xmode/loader/loader.factor
+++ b/basis/xmode/loader/loader.factor
@@ -27,7 +27,7 @@ RULE: SPAN span-rule parse-rule-tag
      shared-tag-attrs delegate-attr match-type-attr span-attrs parse-begin/end-tags init-span-tag ;
  
  RULE: SPAN_REGEXP span-rule parse-rule-tag
      shared-tag-attrs delegate-attr match-type-attr span-attrs parse-begin/end-tags init-span-tag ;
  
  RULE: SPAN_REGEXP span-rule parse-rule-tag
-    shared-tag-attrs delegate-attr match-type-attr span-attrs regexp-attr parse-begin/end-tags init-span-tag ;
+    shared-tag-attrs delegate-attr match-type-attr span-attrs regexp-attr parse-regexp-begin/end-tags init-span-tag ;
  
  RULE: EOL_SPAN eol-span-rule parse-rule-tag
      shared-tag-attrs delegate-attr match-type-attr literal-start init-eol-span-tag ;
  
  RULE: EOL_SPAN eol-span-rule parse-rule-tag
      shared-tag-attrs delegate-attr match-type-attr literal-start init-eol-span-tag ;
diff --git a/basis/xmode/loader/syntax/syntax.factor b/basis/xmode/loader/syntax/syntax.factor

index b2e91a77faefe4895a5411400112bed47de5ad0a..e626d230da3a8600ecb6520c88fba9c9be2cd791 100644 (file)
--- a/basis/xmode/loader/syntax/syntax.factor
+++ b/basis/xmode/loader/syntax/syntax.factor
@@ -36,8 +36,7 @@ SYNTAX: RULE:
      [ "NAME" attr ] [ "VALUE" attr ] bi ;
  
  : parse-props-tag ( tag -- assoc )
      [ "NAME" attr ] [ "VALUE" attr ] bi ;
  
  : parse-props-tag ( tag -- assoc )
-    children-tags
-    [ parse-prop-tag ] H{ } map>assoc ;
+    children-tags [ parse-prop-tag ] H{ } map>assoc ;
  
  : position-attrs ( tag -- at-line-start? at-whitespace-end? at-word-start? )
      ! XXX Wrong logic!
  
  : position-attrs ( tag -- at-line-start? at-whitespace-end? at-word-start? )
      ! XXX Wrong logic!
@@ -63,16 +62,25 @@ SYNTAX: RULE:
  : delegate-attr ( -- )
      { "DELEGATE" f delegate<< } , ;
  
  : delegate-attr ( -- )
      { "DELEGATE" f delegate<< } , ;
  
+! XXX: check HASH_CHAR for full prefix, not just first character
+
+: char<< ( value object -- )
+    [ ?first ] dip chars<< ;
+
  : regexp-attr ( -- )
  : regexp-attr ( -- )
-    { "HASH_CHAR" f chars<< } , ;
+    { "HASH_CHAR" f char<< } ,
+    { "HASH_CHARS" f chars<< } , ;
  
  : match-type-attr ( -- )
      { "MATCH_TYPE" string>match-type match-token<< } , ;
  
  
  : match-type-attr ( -- )
      { "MATCH_TYPE" string>match-type match-token<< } , ;
  
+: string>escape ( str -- escape/f )
+    [ f ] [ <escape-rule> ] if-empty ;
+
  : span-attrs ( -- )
      { "NO_LINE_BREAK" string>boolean no-line-break?<< } ,
      { "NO_WORD_BREAK" string>boolean no-word-break?<< } ,
  : span-attrs ( -- )
      { "NO_LINE_BREAK" string>boolean no-line-break?<< } ,
      { "NO_WORD_BREAK" string>boolean no-word-break?<< } ,
-    { "NO_ESCAPE" string>boolean no-escape?<< } , ;
+    { "ESCAPE" string>escape escape-rule<< } , ;
  
  : literal-start ( -- )
      [ parse-literal-matcher >>start drop ] , ;
  
  : literal-start ( -- )
      [ parse-literal-matcher >>start drop ] , ;
@@ -83,22 +91,30 @@ SYNTAX: RULE:
  : literal-end ( -- )
      [ parse-literal-matcher >>end drop ] , ;
  
  : literal-end ( -- )
      [ parse-literal-matcher >>end drop ] , ;
  
-! SPAN's children
  TAGS: parse-begin/end-tag ( rule tag -- )
  
  TAG: BEGIN parse-begin/end-tag
  TAGS: parse-begin/end-tag ( rule tag -- )
  
  TAG: BEGIN parse-begin/end-tag
-    ! XXX
      parse-literal-matcher >>start drop ;
  
  TAG: END parse-begin/end-tag
      parse-literal-matcher >>start drop ;
  
  TAG: END parse-begin/end-tag
-    ! XXX
      parse-literal-matcher >>end drop ;
  
  : parse-begin/end-tags ( -- )
      parse-literal-matcher >>end drop ;
  
  : parse-begin/end-tags ( -- )
-    [
-        ! XXX: handle position attrs on span tag itself
-        children-tags [ parse-begin/end-tag ] with each
-    ] , ;
+    [ children-tags [ parse-begin/end-tag ] with each ] , ;
+
+TAGS: parse-regexp-begin/end-tag ( rule tag -- )
+
+TAG: BEGIN parse-regexp-begin/end-tag
+    parse-regexp-matcher >>start drop ;
+
+! XXX: END AT_WHITESPACE_END="TRUE"?
+
+TAG: END parse-regexp-begin/end-tag
+    dup "REGEXP" attr string>boolean
+    [ parse-regexp-matcher ] [ parse-literal-matcher ] if >>end drop ;
+
+: parse-regexp-begin/end-tags ( -- )
+    [ children-tags [ parse-regexp-begin/end-tag ] with each ] , ;
  
  : init-span-tag ( -- ) [ drop init-span ] , ;
  
  
  : init-span-tag ( -- ) [ drop init-span ] , ;
  
diff --git a/basis/xmode/marker/marker-tests.factor b/basis/xmode/marker/marker-tests.factor

index b6f6676016bb0736a2f1afd0dacf6bbfd212ff0a..6989e7fca59215518a28985c9f3793f2bb5562ac 100644 (file)
--- a/basis/xmode/marker/marker-tests.factor
+++ b/basis/xmode/marker/marker-tests.factor
@@ -37,13 +37,13 @@ xmode.marker tools.test kernel ;
  
  {
      {
  
  {
      {
-        T{ token f "//" COMMENT2 }
-        T{ token f " " COMMENT2 }
-        T{ token f "hello" COMMENT2 }
-        T{ token f " " COMMENT2 }
-        T{ token f "world" COMMENT2 }
+        T{ token f "#" COMMENT1 }
+        T{ token f " " COMMENT1 }
+        T{ token f "hello" COMMENT1 }
+        T{ token f " " COMMENT1 }
+        T{ token f "world" COMMENT1 }
      }
      }
-} [ f "// hello world" "java" load-mode tokenize-line nip ] unit-test
+} [ f "# hello world" "python" load-mode tokenize-line nip ] unit-test
  
  
  {
  
  
  {
diff --git a/basis/xmode/marker/marker.factor b/basis/xmode/marker/marker.factor

index 7e7d0b5ccf08a24fb46d9012a5e5696beec299d2..d29623820875d5758aaa1e940c077b9ca1c33bb1 100644 (file)
--- a/basis/xmode/marker/marker.factor
+++ b/basis/xmode/marker/marker.factor
@@ -1,9 +1,11 @@
  ! Copyright (C) 2008 Slava Pestov.
  ! See http://factorcode.org/license.txt for BSD license.
  ! Copyright (C) 2008 Slava Pestov.
  ! See http://factorcode.org/license.txt for BSD license.
+
  USING: accessors ascii assocs combinators
  USING: accessors ascii assocs combinators
-combinators.short-circuit kernel make math namespaces regexp
-sequences strings xmode.marker.state xmode.rules xmode.tokens
-xmode.utilities ;
+combinators.short-circuit formatting kernel make math namespaces
+regexp regexp.parser sequences splitting strings
+xmode.marker.state xmode.rules xmode.tokens xmode.utilities ;
+
  IN: xmode.marker
  
  ! Next two words copied from parser-combinators
  IN: xmode.marker
  
  ! Next two words copied from parser-combinators
@@ -72,22 +74,81 @@ M: rule match-position drop position get ;
  : rest-of-line ( -- str )
      line get position get tail-slice ;
  
  : rest-of-line ( -- str )
      line get position get tail-slice ;
  
+: match-start ( string regexp -- slice/f )
+    first-match dup [ dup from>> 0 = [ drop f ] unless ] when ;
+
  GENERIC: text-matches? ( string text -- match-count/f )
  
  M: f text-matches?
      2drop f ;
  
  M: string-matcher text-matches?
  GENERIC: text-matches? ( string text -- match-count/f )
  
  M: f text-matches?
      2drop f ;
  
  M: string-matcher text-matches?
-    [
-        [ string>> ] [ ignore-case?>> ] bi string-head?
-    ] keep string>> length and ;
+    [ string>> ] [ ignore-case?>> ] bi
+    [ string-head? ] keepd length and ;
  
  M: regexp text-matches?
  
  M: regexp text-matches?
-    [ >string ] dip first-match dup [ to>> ] when ;
+    [ >string ] dip match-start dup [ to>> ] when ;
+
+<PRIVATE
+
+! XXX: Terrible inefficient regexp match group support
+
+: #match-groups ( regexp -- n/f )
+    raw>> [ CHAR: ( = ] count [ f ] when-zero ;
+
+: nth-index ( n obj seq -- i )
+    [ = dup [ drop 1 - dup 0 < ] when ] with find drop nip ;
+
+: match-group-regexp ( regexp n -- skip-regexp match-regexp )
+    [ [ options>> options>string ] [ raw>> ] bi ] dip
+    CHAR: ( pick nth-index cut CHAR: ) over index 1 + head
+    rot '[ _ <optioned-regexp> ] bi@ ;
+
+: skip-first-match ( match regexp -- tailseq )
+    first-match [ seq>> ] [ to>> ] bi tail ;
+
+: nth-match ( match regexp n -- slice/f )
+    match-group-regexp [ skip-first-match ] [ first-match ] bi* ;
+
+: update-match-group ( str match regexp n -- str' )
+    [ nth-match ] [ CHAR: 1 + "$%c" sprintf ] bi swap replace ;
+
+: update-match-groups ( str match regexp -- str' )
+    [ >string ] dip
+    dup #match-groups [ update-match-group ] 2with each-integer ;
+
+GENERIC: fixup-end ( match regexp end -- end' )
  
  
-: rule-start-matches? ( rule -- match-count/f )
-    [ start>> dup ] keep can-match-here? [
-        rest-of-line swap text>> text-matches?
+M: string-matcher fixup-end
+    [ string>> -rot update-match-groups ]
+    [ ignore-case?>> ] bi <string-matcher> ;
+
+M: regexp fixup-end
+    [ raw>> [ -rot update-match-groups ] keep swap ]
+    [ options>> options>string ] bi <optioned-regexp> {
+        [ parse-tree>> ] [ options>> ] [ dfa>> ] [ next-match>> ]
+    } cleave regexp boa ;
+
+: fixup-end? ( text -- ? )
+    { [ regexp? ] [ #match-groups ] } 1&& ;
+
+: fixup-end/text-matches? ( string regexp rule -- match-count/f )
+    [ >string ] 2dip [ [ match-start dup ] keep ] dip pick [
+        end>> [ [ fixup-end ] change-text drop ] [ 2drop ] if*
+    ] [
+        3drop
+    ] if dup [ to>> ] when ;
+
+PRIVATE>
+
+:: rule-start-matches? ( rule -- match-count/f )
+    rule start>> dup rule can-match-here? [
+        rest-of-line swap text>>
+        dup fixup-end? [
+            rule fixup-end/text-matches?
+        ] [
+            text-matches?
+        ] if
      ] [
          drop f
      ] if ;
      ] [
          drop f
      ] if ;
@@ -128,17 +189,16 @@ GENERIC: handle-rule-end ( match-count rule -- )
      ] ?if ;
  
  : check-escape-rule ( rule -- ? )
      ] ?if ;
  
  : check-escape-rule ( rule -- ? )
-    no-escape?>> [ f ] [
-        find-escape-rule dup [
-            dup rule-start-matches? [
-                swap handle-rule-start
-                delegate-end-escaped? toggle
-                t
-            ] [
-                drop f
-            ] if*
-        ] when
-    ] if ;
+    escape-rule>> [ find-escape-rule ] unless*
+    dup [
+        dup rule-start-matches? [
+            swap handle-rule-start
+            delegate-end-escaped? toggle
+            t
+        ] [
+            drop f
+        ] if*
+    ] when ;
  
  : check-every-rule ( -- ? )
      current-char current-rule-set get-rules
  
  : check-every-rule ( -- ? )
      current-char current-rule-set get-rules
diff --git a/basis/xmode/rules/rules.factor b/basis/xmode/rules/rules.factor

index 084abb5084c07e6063b78237c6ebecf9474184de..27171b794b6dd033bacac068a5433fff6e0e7848 100644 (file)
--- a/basis/xmode/rules/rules.factor
+++ b/basis/xmode/rules/rules.factor
@@ -38,6 +38,7 @@ MEMO: standard-rule-set ( id -- ruleset )
      imports>> push ;
  
  : inverted-index ( hashes key index -- )
      imports>> push ;
  
  : inverted-index ( hashes key index -- )
+    [ [ { f } ] when-empty ] 2dip
      [ swapd push-at ] 2curry each ;
  
  : ?push-all ( seq1 seq2 -- seq1+seq2 )
      [ swapd push-at ] 2curry each ;
  
  : ?push-all ( seq1 seq2 -- seq1+seq2 )
@@ -60,13 +61,13 @@ C: <matcher> matcher
  TUPLE: rule
  no-line-break?
  no-word-break?
  TUPLE: rule
  no-line-break?
  no-word-break?
-no-escape?
  start
  end
  match-token
  body-token
  delegate
  chars
  start
  end
  match-token
  body-token
  delegate
  chars
+escape-rule
  ;
  
  TUPLE: seq-rule < rule ;
  ;
  
  TUPLE: seq-rule < rule ;
@@ -111,10 +112,6 @@ M: regexp text-hash-char drop f ;
      [ dup rule-chars* >upper swap ] dip rules>> inverted-index ;
  
  : add-escape-rule ( string ruleset -- )
      [ dup rule-chars* >upper swap ] dip rules>> inverted-index ;
  
  : add-escape-rule ( string ruleset -- )
-    over [
-        [ <escape-rule> ] dip
-        2dup escape-rule<<
-        add-rule
-    ] [
-        2drop
-    ] if ;
+    '[
+        <escape-rule> _ [ escape-rule<< ] [ add-rule ] 2bi
+    ] unless-empty ;
diff --git a/basis/xmode/utilities/utilities.factor b/basis/xmode/utilities/utilities.factor

index 9ded828f92a49f8984434c5195489a7d8eab0ce9..2d5e3c7f4041f632231082cd8765fe6ac1d7a2b4 100644 (file)
--- a/basis/xmode/utilities/utilities.factor
+++ b/basis/xmode/utilities/utilities.factor
@@ -1,5 +1,5 @@
  USING: combinators kernel namespaces quotations regexp sequences
  USING: combinators kernel namespaces quotations regexp sequences
-xml.data xml.traversal ;
+splitting xml.data xml.traversal ;
  IN: xmode.utilities
  
  : implies ( x y -- z ) [ not ] dip or ; inline
  IN: xmode.utilities
  
  : implies ( x y -- z ) [ not ] dip or ; inline
@@ -32,4 +32,6 @@ MACRO: (init-from-tag) ( specs -- quot )
      over [ (init-from-tag) ] dip ; inline
  
  : <?insensitive-regexp> ( string ? -- regexp )
      over [ (init-from-tag) ] dip ; inline
  
  : <?insensitive-regexp> ( string ? -- regexp )
+    ! handle Java style case-insensitive flags
+    "(?i)" pick subseq-start [ drop "(?i)" "" replace t ] when
      "i" "" ? <optioned-regexp> ;
      "i" "" ? <optioned-regexp> ;
author	John Benediktsson <mrjbq7@gmail.com>
	Fri, 5 Aug 2022 23:15:13 +0000 (16:15 -0700)
committer	John Benediktsson <mrjbq7@gmail.com>
	Fri, 5 Aug 2022 23:15:13 +0000 (16:15 -0700)
basis/xmode/loader/loader.factor		patch \| blob \| history
basis/xmode/loader/syntax/syntax.factor		patch \| blob \| history
basis/xmode/marker/marker-tests.factor		patch \| blob \| history
basis/xmode/marker/marker.factor		patch \| blob \| history
basis/xmode/rules/rules.factor		patch \| blob \| history
basis/xmode/utilities/utilities.factor		patch \| blob \| history