]> gitweb.factorcode.org Git - factor.git/commitdiff
modern.html: yet another html parser
authorDoug Coleman <doug.coleman@gmail.com>
Sun, 11 Apr 2021 16:05:13 +0000 (11:05 -0500)
committerDoug Coleman <doug.coleman@gmail.com>
Sun, 11 Apr 2021 17:34:39 +0000 (12:34 -0500)
- does not handle typos
- not html5 compliant (that is a larger project, see html5 branch)
- meant to use with chloe templates when i figure out the template compiler works

extra/modern/html/authors.txt [new file with mode: 0644]
extra/modern/html/html.factor [new file with mode: 0644]

diff --git a/extra/modern/html/authors.txt b/extra/modern/html/authors.txt
new file mode 100644 (file)
index 0000000..7c1b2f2
--- /dev/null
@@ -0,0 +1 @@
+Doug Coleman
diff --git a/extra/modern/html/html.factor b/extra/modern/html/html.factor
new file mode 100644 (file)
index 0000000..48a2441
--- /dev/null
@@ -0,0 +1,159 @@
+! Copyright (C) 2021 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors arrays combinators combinators.short-circuit
+generalizations kernel make math modern modern.slices multiline
+sequences sequences.extras strings unicode ;
+IN: modern.html
+
+TUPLE: tag name open-close-delimiter props children ;
+
+TUPLE: doctype open close values ;
+: <doctype> ( open close values -- doctype )
+    doctype new
+        swap >>values
+        swap >string >>close
+        swap >string >>open ;
+
+TUPLE: close-tag name ;
+: <close-tag> ( name -- tag )
+    close-tag new
+        swap >string rest rest but-last >>name ;
+
+TUPLE: open-tag < tag close-tag ;
+: <open-tag> ( name delimiter props -- tag )
+    open-tag new
+        swap >>props
+        swap >string drop ! >>open-close-delimiter
+        swap >string >>name
+        V{ } clone >>children ;
+
+TUPLE: self-close-tag < tag ;
+: <self-close-tag> ( name delimiter props -- tag )
+    self-close-tag new
+        swap >>props
+        swap >string drop ! >>open-close-delimiter
+        swap >string >>name
+        V{ } clone >>children ;
+
+: read-squote-string-payload ( n string -- n' string )
+    over [
+        { CHAR: \\ CHAR: ' } slice-til-separator-inclusive {
+            { f [ drop ] }
+            { CHAR: ' [ drop ] }
+            { CHAR: \\ [ drop next-char-from drop read-string-payload ] }
+        } case
+    ] [
+        string-expected-got-eof
+    ] if ;
+
+: read-dquote-string-payload ( n string -- n' string )
+    over [
+        { CHAR: \\ CHAR: \" } slice-til-separator-inclusive {
+            { f [ drop ] }
+            { CHAR: \" [ drop ] }
+            { CHAR: \\ [ drop next-char-from drop read-string-payload ] }
+        } case
+    ] [
+        string-expected-got-eof
+    ] if ;
+
+:: read-string ( n string char -- n' string payload )
+    n string char CHAR: ' = [ read-squote-string-payload ] [ read-dquote-string-payload ] if drop :> n'
+    n' string
+    n' [ n string string-expected-got-eof ] unless
+    n n' 1 - string <slice> ;
+
+: take-tag-name ( n string -- n' string tag )
+    [ "\s\r\n/>" member? ] slice-until ;
+
+: read-value ( n string -- n' string value )
+    skip-whitespace next-char-from {
+        { CHAR: ' [ CHAR: ' read-string ] }
+        { CHAR: " [ CHAR: " read-string ] }
+        { CHAR: [ [ "[" throw ] }
+        { CHAR: { [ "{" throw ] }
+        [ [ take-tag-name ] dip prefix ]
+    } case ;
+
+: read-prop ( n string -- n' string closing/f prop/f )
+    skip-whitespace "\s\n\r\"'<=/>" slice-til-either {
+        { CHAR: < [ "< error" throw ] }
+        { CHAR: = [ 1 split-slice-back drop [ read-value ] dip [ >string ] bi@ swap 2array f swap ] }
+        { CHAR: / [ ">" expect-and-span 2 split-slice-back swap >string f like ] }
+        { CHAR: > [ 1 split-slice-back swap >string f like ] }
+        { CHAR: " [ first read-string >string f swap ] }
+        { CHAR: ' [ first read-string >string f swap ] }
+        { CHAR: \s [ f swap >string ] }
+        { CHAR: \r [ f swap >string ] }
+        { CHAR: \n [ f swap >string ] }
+        { f [ "efff" throw ] }
+    } case ;
+
+: read-props ( props n string -- props n' string closing )
+    read-prop
+    [ 5 npick push ] when*
+    [ ] [ read-props ] if* ;
+
+: read-doctype ( n string opening -- n string doctype )
+    "!" expect-and-span "DOCTYPE" expect-and-span
+    [ V{ } clone -rot read-props ] dip
+    swap 5 nrot <doctype> ;
+
+: read-open-tag ( n string opening -- n' string tag )
+    [ take-tag-name ] dip drop ! B span-slices
+    [ V{ } clone -rot read-props ] dip
+    swap 5 nrot over ">" sequence= [
+        <open-tag>
+    ] [
+        <self-close-tag>
+    ] if ;
+
+: read-close-tag ( n string opening -- n' string tag )
+    "/" expect-and-span
+    [ take-tag-name ] dip span-slices
+    ">" expect-and-span
+    <close-tag> ;
+
+:: shorten* ( vector n -- seq )
+    vector n tail
+    n vector shorten ;
+
+: pop-til-end ( stack quot -- seq/f )
+    [ find-last drop ] keepd swap
+    [ shorten* ] [ drop f ] if* ; inline
+
+ERROR: unmatched-closing-tag-error stack tag ;
+:: find-last-open-tag ( stack name -- seq )
+    stack [ { [ tag? ] [ name>> name = ] } 1&& ] find-last drop [
+        stack swap shorten*
+    ] [
+        stack name unmatched-closing-tag-error
+    ] if* ;
+
+: lex-html ( stack n string -- stack n' string  )
+    skip-whitespace "<" slice-til-either {
+        { CHAR: < [
+            1 split-slice-back [ >string f like [ reach push ] when* ] dip
+            [ 2dup peek-from ] dip
+            swap {
+                { CHAR: / [
+                    read-close-tag reach over name>> find-last-open-tag unclip
+                    swap >>children
+                    swap >>close-tag
+                    ] }
+                { CHAR: ! [ read-doctype ] }
+                [ drop read-open-tag ]
+            } case
+        ] }
+        { f [ drop f ] }
+        [ drop >string ]
+    } case [ reach push lex-html ] when* ;
+
+ERROR: unmatched-open-tags stack seq ;
+: check-final-stack ( stack -- stack )
+    dup [
+        { [ open-tag? ] [ close-tag>> not ] } 1&&
+    ] filter [ unmatched-open-tags ] unless-empty ;
+
+: string>html ( string -- sequence )
+    [ V{ } clone 0 ] dip lex-html 2drop check-final-stack ;