From: John Benediktsson Date: Tue, 9 Aug 2022 21:44:15 +0000 (-0700) Subject: regexp: fix case-insensitive lookahead and lookbehind. X-Git-Tag: 0.99~1153 X-Git-Url: https://gitweb.factorcode.org/gitweb.cgi?p=factor.git;a=commitdiff_plain;h=c0f0342d9ae9197401af909d86204c2d1f89559a regexp: fix case-insensitive lookahead and lookbehind. --- diff --git a/basis/regexp/nfa/nfa.factor b/basis/regexp/nfa/nfa.factor index 6912e4d94c..044313f5e4 100644 --- a/basis/regexp/nfa/nfa.factor +++ b/basis/regexp/nfa/nfa.factor @@ -53,24 +53,12 @@ M:: star nfa-node ( node -- start end ) s1 s3 epsilon-transition s2 s3 ; -GENERIC: modify-epsilon ( tag -- newtag ) -! Potential off-by-one errors when lookaround nested in lookbehind - -M: object modify-epsilon ; - -: line-option ( multiline unix-lines default -- option ) - multiline option? [ - drop [ unix-lines option? ] 2dip swap ? - ] [ 2nip ] if ; +DEFER: modify-class -M: $crlf modify-epsilon - $unix end-of-input line-option ; - -M: ^crlf modify-epsilon - ^unix beginning-of-input line-option ; +! Potential off-by-one errors when lookaround nested in lookbehind M: tagged-epsilon nfa-node - clone [ modify-epsilon ] change-tag add-simple-entry ; + clone [ modify-class ] change-tag add-simple-entry ; M: concatenation nfa-node [ first>> ] [ second>> ] bi @@ -96,6 +84,31 @@ GENERIC: modify-class ( char-class -- char-class' ) M: object modify-class ; +M: concatenation modify-class + [ first>> ] [ second>> ] bi [ modify-class ] bi@ + concatenation boa ; + +M: alternation modify-class + [ first>> ] [ second>> ] bi [ modify-class ] bi@ + alternation boa ; + +M: lookahead modify-class + term>> modify-class lookahead boa ; + +M: lookbehind modify-class + term>> modify-class lookbehind boa ; + +: line-option ( multiline unix-lines default -- option ) + multiline option? [ + drop [ unix-lines option? ] 2dip swap ? + ] [ 2nip ] if ; + +M: $crlf modify-class + $unix end-of-input line-option ; + +M: ^crlf modify-class + ^unix beginning-of-input line-option ; + M: integer modify-class case-insensitive option? [ dup Letter? [ @@ -103,9 +116,6 @@ M: integer modify-class ] when ] when ; -M: integer nfa-node - modify-class add-simple-entry ; - M: primitive-class modify-class class>> modify-class ; diff --git a/basis/regexp/regexp-tests.factor b/basis/regexp/regexp-tests.factor index 36e877a03f..cdbe7742ef 100644 --- a/basis/regexp/regexp-tests.factor +++ b/basis/regexp/regexp-tests.factor @@ -513,6 +513,9 @@ unit-test { 3 } [ "caba" "(?<=b)a" first-match from>> ] unit-test +{ "<" } [ "