USING: sequences io.files io.encodings.ascii kernel values\r
splitting accessors math.parser ascii io assocs strings math\r
namespaces sorting combinators math.order arrays\r
-unicode.normalize ;\r
+unicode.normalize unicode.data combinators.lib locals ;\r
IN: unicode.collation\r
\r
VALUE: ducet\r
: derive-weight ( char -- weight )\r
! This should check Noncharacter_Code_Point\r
! If yes, then ignore the character\r
- ! otherwise, apply derivation formula\r
+ ! otherwise, apply derivation formula with the right base\r
drop { } ;\r
\r
-: string>weights ( string -- weights )\r
- ! This should actually look things up with\r
- ! multichar collation elements\r
- ! Also, do weight derivation for things not in DUCET\r
- [ dup 1string ducet at [ ] [ derive-weight ] ?if ]\r
+: last ( -- char )\r
+ building get empty? [ 0 ] [ building get peek peek ] if ;\r
+\r
+: blocked? ( char -- ? )\r
+ combining-class [\r
+ last combining-class =\r
+ ] [ last combining-class ] if* ;\r
+\r
+: possible-bases ( -- slice-of-building )\r
+ building get dup [ first combining-class not ] find-last\r
+ drop [ 0 ] unless* tail-slice ;\r
+\r
+:: ?combine ( char slice i -- ? )\r
+ [let | str [ i slice nth char suffix ] |\r
+ str ducet key? dup\r
+ [ str i slice set-nth ] when\r
+ ] ;\r
+\r
+: add ( char -- )\r
+ dup blocked? [ 1string , ] [\r
+ dup possible-bases dup length\r
+ [ ?combine ] 2with contains?\r
+ [ drop ] [ 1string , ] if\r
+ ] if ;\r
+\r
+: string>graphemes ( string -- graphemes )\r
+ [ [ add ] each ] { } make ;\r
+\r
+: graphemes>weights ( graphemes -- weights )\r
+ [ dup ducet at [ ] [ derive-weight ] ?if ]\r
{ } map-as concat ;\r
\r
: append-weights ( weights quot -- )\r
[ zero? ] tri@ and and ;\r
\r
: filter-ignorable ( weights -- weights' )\r
- ! Filters primary-ignorables which follow variable weighteds\r
- ! and all completely-ignorables\r
>r f r> [\r
tuck primary>> zero? and\r
[ swap ignorable?>> or ]\r
] filter nip ;\r
\r
: collation-key ( string -- key )\r
- nfd string>weights filter-ignorable weights>bytes ;\r
+ nfd string>graphemes graphemes>weights\r
+ filter-ignorable weights>bytes ;\r
\r
: compare-collation ( {str1,key} {str2,key} -- <=> )\r
2dup [ second ] bi@ <=> dup +eq+ =\r
\r
: string<=> ( str1 str2 -- <=> )\r
[ dup collation-key 2array ] bi@ compare-collation ;\r
+\r
+! Fix up table for long contractions\r
+: help-one ( assoc key -- )\r
+ ! Does this need to be more general?\r
+ 2 head 2dup swap key? [ 2drop ] [\r
+ [ [ 1string swap at ] with { } map-as concat ]\r
+ [ swap set-at ] 2bi\r
+ ] if ;\r
+\r
+: insert-helpers ( assoc -- )\r
+ dup keys [ length 3 >= ] filter\r
+ [ help-one ] with each ;\r
+\r
+ducet insert-helpers\r
! Copyright (C) 2006 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: http.client xml xml.utilities kernel sequences
-namespaces http math.parser help math.order ;
+namespaces http math.parser help math.order locals ;
IN: yahoo
TUPLE: result title url summary ;
] map ;
: yahoo-url ( -- str )
- "http://search.yahooapis.com/WebSearchService/V1/webSearch?appid=Factor-search&query=" ;
+ "http://search.yahooapis.com/WebSearchService/V1/webSearch" ;
-: query ( search num -- url )
+:: query ( search num appid -- url )
[
yahoo-url %
- swap url-encode %
- "&results=" % #
+ "?appid=" % appid %
+ "&query=" % search url-encode %
+ "&results=" % num #
] "" make ;
-: search-yahoo ( search num -- seq )
+: factor-id
+ "fRrVAKzV34GDyeRw6bUHDhEWHRedwfOC7e61wwXZLgGF80E67spxdQXuugBe2pgIevMmKwA-" ;
+
+: search-yahoo/id ( search num id -- seq )
query http-get string>xml parse-yahoo ;
+
+: search-yahoo ( search num -- seq )
+ factor-id search-yahoo/id ;