extra/xml/utilities/utilities.factor

   1 ! Copyright (C) 2005, 2006 Daniel Ehrenberg\r
   2 ! See http://factorcode.org/license.txt for BSD license.\r
   3 USING: kernel namespaces sequences words io assocs\r
   4 quotations strings parser arrays xml.data xml.writer debugger\r
   5 splitting vectors ;\r
   6 IN: xml.utilities\r
   7 \r
   8 ! * System for words specialized on tag names\r
   9 \r
  10 TUPLE: process-missing process tag ;\r
  11 M: process-missing error.\r
  12     "Tag <" write\r
  13     dup process-missing-tag print-name\r
  14     "> not implemented on process process " write\r
  15     process-missing-process word-name print ;\r
  16 \r
  17 : run-process ( tag word -- )\r
  18     2dup "xtable" word-prop\r
  19     >r dup name-tag r> at* [ 2nip call ] [\r
  20         drop \ process-missing construct-boa throw\r
  21     ] if ;\r
  22 \r
  23 : PROCESS:\r
  24     CREATE\r
  25     dup H{ } clone "xtable" set-word-prop\r
  26     dup [ run-process ] curry define-compound ; parsing\r
  27 \r
  28 : TAG:\r
  29     scan scan-word\r
  30     parse-definition\r
  31     swap "xtable" word-prop\r
  32     rot "/" split [ >r 2dup r> swap set-at ] each 2drop ;\r
  33     parsing\r
  34 \r
  35 \r
  36 ! * Common utility functions\r
  37 \r
  38 : build-tag* ( items name -- tag )\r
  39     assure-name swap >r f r> <tag> ;\r
  40 \r
  41 : build-tag ( item name -- tag )\r
  42     >r 1array r> build-tag* ;\r
  43 \r
  44 : standard-prolog ( -- prolog )\r
  45     T{ prolog f "1.0" "iso-8859-1" f } ;\r
  46 \r
  47 : build-xml ( tag -- xml )\r
  48     standard-prolog { } rot { } <xml> ;\r
  49 \r
  50 : children>string ( tag -- string )\r
  51     tag-children\r
  52     dup [ string? ] all?\r
  53     [ "XML tag unexpectedly contains non-text children" throw ] unless\r
  54     concat ;\r
  55 \r
  56 : children-tags ( tag -- sequence )\r
  57     tag-children [ tag? ] subset ;\r
  58 \r
  59 : first-child-tag ( tag -- tag )\r
  60     tag-children [ tag? ] find nip ;\r
  61 \r
  62 ! * Utilities for searching through XML documents\r
  63 ! These all work from the outside in, top to bottom.\r
  64 \r
  65 : with-delegate ( object quot -- object )\r
  66     over clone >r >r delegate r> call r>\r
  67     [ set-delegate ] keep ; inline\r
  68 \r
  69 GENERIC# xml-each 1 ( quot tag -- ) inline\r
  70 M: tag xml-each\r
  71     [ call ] 2keep\r
  72     swap tag-children [ swap xml-each ] curry* each ;\r
  73 M: object xml-each\r
  74     call ;\r
  75 M: xml xml-each\r
  76     >r delegate r> xml-each ;\r
  77 \r
  78 GENERIC# xml-map 1 ( quot tag -- tag ) inline\r
  79 M: tag xml-map\r
  80     swap clone over >r swap call r> \r
  81     swap [ tag-children [ swap xml-map ] curry* map ] keep \r
  82     [ set-tag-children ] keep ;\r
  83 M: object xml-map\r
  84     call ;\r
  85 M: xml xml-map\r
  86     swap [ swap xml-map ] with-delegate ;\r
  87 \r
  88 : xml-subset ( quot tag -- seq ) ! quot: tag -- ?\r
  89     V{ } clone rot [\r
  90         swap >r [ swap call ] 2keep rot r>\r
  91         swap [ [ push ] keep ] [ nip ] if\r
  92     ] xml-each nip ;\r
  93 \r
  94 GENERIC# xml-find 1 ( quot tag -- tag ) inline\r
  95 M: tag xml-find\r
  96     [ call ] 2keep swap rot [\r
  97         f swap\r
  98         [ nip over >r swap xml-find r> swap dup ] find\r
  99         2drop ! leaves result of quot\r
 100     ] unless nip ;\r
 101 M: object xml-find\r
 102     keep f ? ;\r
 103 M: xml xml-find\r
 104     >r delegate r> xml-find ;\r
 105 \r
 106 GENERIC# xml-inject 1 ( quot tag -- ) inline\r
 107 M: tag xml-inject\r
 108     swap [\r
 109         swap [ call ] keep\r
 110         [ xml-inject ] keep\r
 111     ] change-each ;\r
 112 M: object xml-inject 2drop ;\r
 113 M: xml xml-inject >r delegate >r xml-inject ;\r
 114 \r
 115 ! * Accessing part of an XML document\r
 116 ! for tag- words, a start means that it searches all children\r
 117 ! and no star searches only direct children\r
 118 \r
 119 : tag-named? ( name elem -- ? )\r
 120     dup tag? [ names-match? ] [ 2drop f ] if ;\r
 121 \r
 122 : tag-named* ( tag name/string -- matching-tag )\r
 123     assure-name swap [ dupd tag-named? ] xml-find nip ;\r
 124 \r
 125 : tags-named* ( tag name/string -- tags-seq )\r
 126     assure-name swap [ dupd tag-named? ] xml-subset nip ;\r
 127 \r
 128 : tag-named ( tag name/string -- matching-tag )\r
 129     ! like get-name-tag but only looks at direct children,\r
 130     ! not all the children down the tree.\r
 131     assure-name swap [ tag-named? ] curry* find nip ;\r
 132 \r
 133 : tags-named ( tag name/string -- tags-seq )\r
 134     assure-name swap [ tag-named? ] curry* subset ;\r
 135 \r
 136 : assert-tag ( name name -- )\r
 137     names-match? [ "Unexpected XML tag found" throw ] unless ;\r
 138 \r
 139 : insert-children ( children tag -- )\r
 140     dup tag-children [ push-all ]\r
 141     [ >r V{ } like r> set-tag-children ] if ;\r
 142 \r
 143 : insert-child ( child tag -- )\r
 144     >r 1vector r> insert-children ;\r
 145 \r
 146 : tag-with-attr? ( elem attr-value attr-name -- ? )\r
 147     rot dup tag? [ at = ] [ drop f ] if ;\r
 148 \r
 149 : tag-with-attr ( tag attr-value attr-name -- matching-tag )\r
 150     assure-name [ tag-with-attr? ] 2curry find nip ;\r
 151 \r
 152 : tags-with-attr ( tag attr-value attr-name -- tags-seq )\r
 153     assure-name [ tag-with-attr? ] 2curry subset ;\r
 154 \r
 155 : tag-with-attr* ( tag attr-value attr-name -- matching-tag )\r
 156     assure-name [ tag-with-attr? ] 2curry xml-find nip ;\r
 157 \r
 158 : tags-with-attr* ( tag attr-value attr-name -- tags-seq )\r
 159     assure-name [ tag-with-attr? ] 2curry xml-subset ;\r
 160 \r
 161 : get-id ( tag id -- elem ) ! elem=tag.getElementById(id)\r
 162     "id" tag-with-attr ;\r
 163 \r
 164 : tags-named-with-attr* ( tag tag-name attr-value attr-name -- tags )\r
 165     >r >r tags-named* r> r> tags-with-attr ;\r
 166 \r