--- /dev/null
+USING: kernel regexp tools.test ;
+IN: verbal-expressions
+
+{ f t } [
+ <verbexp> something >regexp
+ [ "" swap matches? ]
+ [ "a" swap matches? ] bi
+] unit-test
+
+{ t } [
+ "what" <verbexp> anything >regexp matches?
+] unit-test
+
+{ f t } [
+ <verbexp> start-of-line "w" anything-but >regexp
+ [ "what" swap matches? ]
+ [ "time" swap matches? ] bi
+] unit-test
+
+{ f t f } [
+ <verbexp> "a" something-but >regexp
+ [ "" swap matches? ]
+ [ "b" swap matches? ]
+ [ "a" swap matches? ] tri
+] unit-test
+
+{ t f } [
+ <verbexp> start-of-line "a" then >regexp
+ [ "a" swap matches? ]
+ [ "ba" swap matches? ] bi
+] unit-test
+
+{ t f } [
+ <verbexp> "a" then end-of-line >regexp
+ [ "a" swap matches? ]
+ [ "ab" swap matches? ] bi
+] unit-test
+
+{ t t } [
+ <verbexp> start-of-line "a" then "b" maybe >regexp
+ [ "acb" swap re-contains? ]
+ [ "abc" swap re-contains? ] bi
+] unit-test
+
+{ t f } [
+ <verbexp> start-of-line "a" then "xyz" any-of >regexp
+ [ "ay" swap matches? ]
+ [ "abc" swap matches? ] bi
+] unit-test
+
+{ t f } [
+ <verbexp> start-of-line "abc" then -or- "def" then >regexp
+ [ "defzz" swap re-contains? ]
+ [ "xyzabc" swap re-contains? ] bi
+] unit-test
+
+{ t t f } [
+ <verbexp> start-of-line "abc" then line-break "def" then >regexp
+ [ "abc\r\ndef" swap matches? ]
+ [ "abc\ndef" swap matches? ]
+ [ "abc\r\n def" swap matches? ] tri
+] unit-test
+
+{ t f } [
+ <verbexp> start-of-line tab "abc" then >regexp
+ [ "\tabc" swap matches? ]
+ [ "abc" swap matches? ] bi
+] unit-test
+
+{ f } [ "A" <verbexp> start-of-line "a" then >regexp matches? ] unit-test
+{ t t } [
+ <verbexp> start-of-line "a" then case-insensitive >regexp
+ [ "A" swap matches? ]
+ [ "a" swap matches? ] bi
+] unit-test
+
+! TODO: single-line
+
+{ t } [
+ "https://www.google.com"
+ <verbexp>
+ start-of-line
+ "http" then
+ "s" maybe
+ "://" then
+ "www." maybe
+ " " anything-but
+ end-of-line
+ >regexp matches?
+] unit-test
+
--- /dev/null
+! Copyright (C) 2013 John Benediktsson
+! See http://factorcode.org/license.txt for BSD license
+
+USING: accessors ascii assocs combinators.short-circuit fry
+grouping kernel make regexp sequences ;
+
+IN: verbal-expressions
+
+TUPLE: verbal-expression prefix source suffix modifiers ;
+
+: <verbal-expressions> ( -- verbexp )
+ "" "" "" "" verbal-expression boa ; inline
+
+ALIAS: <verbexp> <verbal-expressions>
+
+: >regexp ( verbexp -- regexp )
+ [ [ prefix>> ] [ source>> ] [ suffix>> ] tri 3append ]
+ [ modifiers>> ] bi <optioned-regexp> ; inline
+
+: build-regexp ( ... quot: ( ... verbexp -- ... verbexp ) -- ... regexp )
+ '[ <verbexp> @ >regexp ] call ; inline
+
+<PRIVATE
+
+: add ( verbexp str -- verbexp )
+ '[ _ append ] change-source ;
+
+: add-modifier ( verbexp ch -- verbexp )
+ '[ _ suffix ] change-modifiers ;
+
+: remove-modifier ( verbexp ch -- verbexp )
+ '[ _ swap remove ] change-modifiers ;
+
+: re-escape ( str -- str' )
+ [
+ [
+ dup { [ Letter? ] [ digit? ] } 1||
+ [ CHAR: \ , ] unless ,
+ ] each
+ ] "" make ;
+
+PRIVATE>
+
+: anything ( verbexp -- verbexp )
+ "(?:.*)" add ;
+
+: anything-but ( verbexp value -- verbexp )
+ re-escape "(?:[^" "]*)" surround add ;
+
+: something ( verbexp -- verbexp )
+ "(?:.+)" add ;
+
+: something-but ( verbexp value -- verbexp )
+ re-escape "(?:[^" "]+)" surround add ;
+
+: end-of-line ( verbexp -- verbexp )
+ [ "$" append ] change-suffix ;
+
+: maybe ( verbexp value -- verbexp )
+ re-escape "(?:" ")?" surround add ;
+
+: start-of-line ( verbexp -- verbexp )
+ [ "^" append ] change-prefix ;
+
+: -find- ( verbexp value -- verbexp )
+ re-escape "(" ")" surround add ;
+
+: then ( verbexp value -- verbexp )
+ re-escape "(?:" ")" surround add ;
+
+: any-of ( verbexp value -- verbexp )
+ re-escape "(?:[" "])" surround add ;
+
+: line-break ( verbexp -- verbexp )
+ "(?:(?:\\n)|(?:\\r\\n))" add ;
+
+ALIAS: br line-break
+
+: range ( verbexp alist -- verbexp )
+ [ [ re-escape ] bi@ "-" glue ] { } assoc>map concat
+ "([" "])" surround add ;
+
+: tab ( verbexp -- verbexp ) "\\t" add ;
+
+: word ( verbexp -- verbexp ) "\\w+" add ;
+
+: space ( verbexp -- verbexp ) "\\s" add ;
+
+: many ( verbexp -- verbexp )
+ [
+ dup ?last "*+" member? [ "+" append ] unless
+ ] change-source ;
+
+: -or- ( verbexp -- verbexp )
+ [ "(" append ] change-prefix
+ [ ")|(" append ] change-source
+ [ ")" prepend ] change-suffix ;
+
+: case-insensitive ( verbexp -- verbexp )
+ CHAR: i add-modifier ;
+
+: case-sensitive ( verbexp -- verbexp )
+ CHAR: i remove-modifier ;
+
+: multiline ( verbexp -- verbexp )
+ CHAR: m add-modifier ;
+
+: singleline ( verbexp -- verbexp )
+ CHAR: m remove-modifier ;
+
+ALIAS: ^( start-of-line
+ALIAS: () then
+ALIAS: ()? maybe
+ALIAS: [] range
+ALIAS: ()* anything
+ALIAS: ^]* anything-but
+ALIAS: ()+ something
+ALIAS: [^]+ something-but
+ALIAS: )|( -or-
+ALIAS: )$ end-of-line