basis/io/encodings/8-bit/8-bit-docs.factor

   1 ! Copyright (C) 2008 Daniel Ehrenberg
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: help.syntax help.markup ;
   4 IN: io.encodings.8-bit
   5
   6 HELP: ebcdic
   7 { $var-description "EBCDIC is an 8-bit legacy encoding designed for IBM mainframes like System/360 in the 1960s. It has since fallen into disuse. It contains large unallocated regions, and the version included here (code page 37) contains auxiliary characters in this region for English- and Portugese-speaking countries." }
   8 { $see-also "encodings-introduction" } ;
   9
  10 HELP: koi8-r
  11 { $var-description "KOI8-R is an 8-bit superset of ASCII which encodes the Cyrillic alphabet, as used in Russian and Bulgarian. Characters are in such an order that, if the eight bit is stripped, text is still interpretable as ASCII. Block-building characters also exist." }
  12 { $see-also "encodings-introduction" } ;
  13
  14 HELP: latin/arabic
  15 { $var-description "This is the ISO-8859-6 encoding, also called Latin/Arabic. It is an 8-bit superset of ASCII and provides the characters necessary for Arabic, though not other languages which use Arabic script." }
  16 { $see-also "encodings-introduction" } ;
  17
  18 HELP: latin/cyrillic
  19 { $var-description "This is the ISO-8859-5 encoding, also called Latin/Cyrillic. It is an 8-bit superset of ASCII and provides the characters necessary for most languages which use Cyrilic, including Russian, Macedonian, Belarusian, Bulgarian, Serbian, and Ukrainian. KOI8-R is used much more commonly." }
  20 { $see-also "encodings-introduction" } ;
  21
  22 HELP: latin/greek
  23 { $description "This is the ISO-8859-7 encoding, also called Latin/Greek. It is an 8-bit superset of ASCII and provides the characters necessary for Greek written in modern monotonic orthography, or ancient Greek without accent marks." }
  24 { $see-also "encodings-introduction" } ;
  25
  26 HELP: latin/hebrew
  27 { $var-description "This is the ISO-8859-8 encoding, also called Latin/Hebrew. It is an 8-bit superset of ASCII and provides the characters necessary for modern Hebrew without explicit vowels. Generally, this is interpreted in logical order, making it ISO-8859-8-I, technically." }
  28 { $see-also "encodings-introduction" } ;
  29
  30 HELP: latin/thai
  31 { $var-description "This is the ISO-8859-11 encoding, also called Latin/Thai. It is an 8-bit superset of ASCII containing the characters necessary to represent Thai. It is basically identical to TIS-620." }
  32 { $see-also "encodings-introduction" } ;
  33
  34 HELP: latin2
  35 { $var-description "This is the ISO-8859-2 encoding, also called Latin-2: Eastern European. It is an 8-bit superset of ASCII and provides the characters necessary for most eastern European languages." }
  36 { $see-also "encodings-introduction" } ;
  37
  38 HELP: latin3
  39 { $var-description "This is the ISO-8859-3 encoding, also called Latin-3: South European. It is an 8-bit superset of ASCII and provides the characters necessary for Turkish, Maltese and Esperanto." }
  40 { $see-also "encodings-introduction" } ;
  41
  42 HELP: latin4
  43 { $description "This is the ISO-8859-4 encoding, also called Latin-4: North European. It is an 8-bit superset of ASCII and provides the characters necessary for Latvian, Lithuanian, Estonian, Greenlandic and Sami." }
  44 { $see-also "encodings-introduction" } ;
  45
  46 HELP: latin5
  47 { $var-description "This is the ISO-8859-9 encoding, also called Latin-5: Turkish. It is an 8-bit superset of ASCII and provides the characters necessary for Turkish, similar to Latin-1 but replacing the spots used for Icelandic with characters used in Turkish." }
  48 { $see-also "encodings-introduction" } ;
  49
  50 HELP: latin6
  51 { $var-description "This is the ISO-8859-10 encoding, also called Latin-6: Nordic. It is an 8-bit superset of ASCII containing the same characters as Latin-4, but rearranged to be of better use to nordic languages." }
  52 { $see-also "encodings-introduction" } ;
  53
  54 HELP: latin7
  55 { $var-description "This is the ISO-8859-13 encoding, also called Latin-7: Baltic Rim. It is an 8-bit superset of ASCII containing all characters necessary to represent Baltic Rim languages, as previous character sets were incomplete." }
  56 { $see-also "encodings-introduction" } ;
  57
  58 HELP: latin8
  59 { $var-description "This is the ISO-8859-14 encoding, also called Latin-8: Celtic. It is an 8-bit superset of ASCII designed for Celtic languages like Gaelic and Breton." }
  60 { $see-also "encodings-introduction" } ;
  61
  62 HELP: latin9
  63 { $var-description "This is the ISO-8859-15 encoding, also called Latin-9 and unofficially as Latin-0. It is an 8-bit superset of ASCII designed as a modification of Latin-1, removing little-used characters in favor of the Euro symbol and other characters." }
  64 { $see-also "encodings-introduction" } ;
  65
  66 HELP: latin10
  67 { $var-description "This is the ISO-8859-16 encoding, also called Latin-10: South-Eastern European. It is an 8-bit superset of ASCII." }
  68 { $see-also "encodings-introduction" } ;
  69
  70 HELP: cp437
  71 { $var-description "This is the IBM437 encoding, also called CP437. It is an 8-bit superset of ASCII and provides the original DOS character set with the box-drawing characters used to draw windows and frames on the text terminals back in the day." }
  72 { $see-also "encodings-introduction" } ;
  73
  74 HELP: mac-roman
  75 { $var-description "Mac Roman is an 8-bit superset of ASCII which was the standard encoding on Mac OS prior to version 10. It is incompatible with Latin-1 in all but a few places and ASCII, and it is suitable for encoding many Western European languages." }
  76 { $see-also "encodings-introduction" } ;
  77
  78 HELP: windows-1252
  79 { $var-description "Windows 1252 is an 8-bit superset of ASCII which is closely related to Latin-1. Control characters in the 0x80 to 0x9F range are replaced with printable characters such as the Euro symbol." }
  80 { $see-also "encodings-introduction" } ;
  81
  82 ARTICLE: "io.encodings.8-bit" "Legacy 8-bit encodings"
  83 "Many encodings are a simple mapping of bytes onto characters. The " { $vocab-link "io.encodings.8-bit" } " vocabulary implements these generically using existing resource files. These encodings should be used with extreme caution, as fully general Unicode encodings like UTF-8 are nearly always more appropriate." ;
  84
  85 ABOUT: "io.encodings.8-bit"