]> git.mdlowis.com Git - archive/carl.git/commitdiff
Updated unicode generator to generate all tables instead of jsut the explictly named...
authorMike D. Lowis <mike.lowis@gentex.com>
Fri, 18 Sep 2015 15:56:20 +0000 (11:56 -0400)
committerMike D. Lowis <mike.lowis@gentex.com>
Fri, 18 Sep 2015 15:56:20 +0000 (11:56 -0400)
source/utf/alphas.c [new file with mode: 0644]
source/utf/digits.c [new file with mode: 0644]
source/utf/other.c [new file with mode: 0644]
source/utf/runetype.c
tools/unicode.rb

diff --git a/source/utf/alphas.c b/source/utf/alphas.c
new file mode 100644 (file)
index 0000000..f7d44d4
--- /dev/null
@@ -0,0 +1,570 @@
+#include <libc.h>
+
+static Rune alphas[561][2] = {
+    { 0x41, 0x5a },
+    { 0x61, 0x7a },
+    { 0xaa, 0xaa },
+    { 0xb5, 0xb5 },
+    { 0xba, 0xba },
+    { 0xc0, 0xd6 },
+    { 0xd8, 0xf6 },
+    { 0xf8, 0x2c1 },
+    { 0x2c6, 0x2d1 },
+    { 0x2e0, 0x2e4 },
+    { 0x2ec, 0x2ec },
+    { 0x2ee, 0x2ee },
+    { 0x370, 0x374 },
+    { 0x376, 0x377 },
+    { 0x37a, 0x37d },
+    { 0x37f, 0x37f },
+    { 0x386, 0x386 },
+    { 0x388, 0x38a },
+    { 0x38c, 0x38c },
+    { 0x38e, 0x3a1 },
+    { 0x3a3, 0x3f5 },
+    { 0x3f7, 0x481 },
+    { 0x48a, 0x52f },
+    { 0x531, 0x556 },
+    { 0x559, 0x559 },
+    { 0x561, 0x587 },
+    { 0x5d0, 0x5ea },
+    { 0x5f0, 0x5f2 },
+    { 0x620, 0x64a },
+    { 0x66e, 0x66f },
+    { 0x671, 0x6d3 },
+    { 0x6d5, 0x6d5 },
+    { 0x6e5, 0x6e6 },
+    { 0x6ee, 0x6ef },
+    { 0x6fa, 0x6fc },
+    { 0x6ff, 0x6ff },
+    { 0x710, 0x710 },
+    { 0x712, 0x72f },
+    { 0x74d, 0x7a5 },
+    { 0x7b1, 0x7b1 },
+    { 0x7ca, 0x7ea },
+    { 0x7f4, 0x7f5 },
+    { 0x7fa, 0x7fa },
+    { 0x800, 0x815 },
+    { 0x81a, 0x81a },
+    { 0x824, 0x824 },
+    { 0x828, 0x828 },
+    { 0x840, 0x858 },
+    { 0x8a0, 0x8b4 },
+    { 0x904, 0x939 },
+    { 0x93d, 0x93d },
+    { 0x950, 0x950 },
+    { 0x958, 0x961 },
+    { 0x971, 0x980 },
+    { 0x985, 0x98c },
+    { 0x98f, 0x990 },
+    { 0x993, 0x9a8 },
+    { 0x9aa, 0x9b0 },
+    { 0x9b2, 0x9b2 },
+    { 0x9b6, 0x9b9 },
+    { 0x9bd, 0x9bd },
+    { 0x9ce, 0x9ce },
+    { 0x9dc, 0x9dd },
+    { 0x9df, 0x9e1 },
+    { 0x9f0, 0x9f1 },
+    { 0xa05, 0xa0a },
+    { 0xa0f, 0xa10 },
+    { 0xa13, 0xa28 },
+    { 0xa2a, 0xa30 },
+    { 0xa32, 0xa33 },
+    { 0xa35, 0xa36 },
+    { 0xa38, 0xa39 },
+    { 0xa59, 0xa5c },
+    { 0xa5e, 0xa5e },
+    { 0xa72, 0xa74 },
+    { 0xa85, 0xa8d },
+    { 0xa8f, 0xa91 },
+    { 0xa93, 0xaa8 },
+    { 0xaaa, 0xab0 },
+    { 0xab2, 0xab3 },
+    { 0xab5, 0xab9 },
+    { 0xabd, 0xabd },
+    { 0xad0, 0xad0 },
+    { 0xae0, 0xae1 },
+    { 0xaf9, 0xaf9 },
+    { 0xb05, 0xb0c },
+    { 0xb0f, 0xb10 },
+    { 0xb13, 0xb28 },
+    { 0xb2a, 0xb30 },
+    { 0xb32, 0xb33 },
+    { 0xb35, 0xb39 },
+    { 0xb3d, 0xb3d },
+    { 0xb5c, 0xb5d },
+    { 0xb5f, 0xb61 },
+    { 0xb71, 0xb71 },
+    { 0xb83, 0xb83 },
+    { 0xb85, 0xb8a },
+    { 0xb8e, 0xb90 },
+    { 0xb92, 0xb95 },
+    { 0xb99, 0xb9a },
+    { 0xb9c, 0xb9c },
+    { 0xb9e, 0xb9f },
+    { 0xba3, 0xba4 },
+    { 0xba8, 0xbaa },
+    { 0xbae, 0xbb9 },
+    { 0xbd0, 0xbd0 },
+    { 0xc05, 0xc0c },
+    { 0xc0e, 0xc10 },
+    { 0xc12, 0xc28 },
+    { 0xc2a, 0xc39 },
+    { 0xc3d, 0xc3d },
+    { 0xc58, 0xc5a },
+    { 0xc60, 0xc61 },
+    { 0xc85, 0xc8c },
+    { 0xc8e, 0xc90 },
+    { 0xc92, 0xca8 },
+    { 0xcaa, 0xcb3 },
+    { 0xcb5, 0xcb9 },
+    { 0xcbd, 0xcbd },
+    { 0xcde, 0xcde },
+    { 0xce0, 0xce1 },
+    { 0xcf1, 0xcf2 },
+    { 0xd05, 0xd0c },
+    { 0xd0e, 0xd10 },
+    { 0xd12, 0xd3a },
+    { 0xd3d, 0xd3d },
+    { 0xd4e, 0xd4e },
+    { 0xd5f, 0xd61 },
+    { 0xd7a, 0xd7f },
+    { 0xd85, 0xd96 },
+    { 0xd9a, 0xdb1 },
+    { 0xdb3, 0xdbb },
+    { 0xdbd, 0xdbd },
+    { 0xdc0, 0xdc6 },
+    { 0xe01, 0xe30 },
+    { 0xe32, 0xe33 },
+    { 0xe40, 0xe46 },
+    { 0xe81, 0xe82 },
+    { 0xe84, 0xe84 },
+    { 0xe87, 0xe88 },
+    { 0xe8a, 0xe8a },
+    { 0xe8d, 0xe8d },
+    { 0xe94, 0xe97 },
+    { 0xe99, 0xe9f },
+    { 0xea1, 0xea3 },
+    { 0xea5, 0xea5 },
+    { 0xea7, 0xea7 },
+    { 0xeaa, 0xeab },
+    { 0xead, 0xeb0 },
+    { 0xeb2, 0xeb3 },
+    { 0xebd, 0xebd },
+    { 0xec0, 0xec4 },
+    { 0xec6, 0xec6 },
+    { 0xedc, 0xedf },
+    { 0xf00, 0xf00 },
+    { 0xf40, 0xf47 },
+    { 0xf49, 0xf6c },
+    { 0xf88, 0xf8c },
+    { 0x1000, 0x102a },
+    { 0x103f, 0x103f },
+    { 0x1050, 0x1055 },
+    { 0x105a, 0x105d },
+    { 0x1061, 0x1061 },
+    { 0x1065, 0x1066 },
+    { 0x106e, 0x1070 },
+    { 0x1075, 0x1081 },
+    { 0x108e, 0x108e },
+    { 0x10a0, 0x10c5 },
+    { 0x10c7, 0x10c7 },
+    { 0x10cd, 0x10cd },
+    { 0x10d0, 0x10fa },
+    { 0x10fc, 0x1248 },
+    { 0x124a, 0x124d },
+    { 0x1250, 0x1256 },
+    { 0x1258, 0x1258 },
+    { 0x125a, 0x125d },
+    { 0x1260, 0x1288 },
+    { 0x128a, 0x128d },
+    { 0x1290, 0x12b0 },
+    { 0x12b2, 0x12b5 },
+    { 0x12b8, 0x12be },
+    { 0x12c0, 0x12c0 },
+    { 0x12c2, 0x12c5 },
+    { 0x12c8, 0x12d6 },
+    { 0x12d8, 0x1310 },
+    { 0x1312, 0x1315 },
+    { 0x1318, 0x135a },
+    { 0x1380, 0x138f },
+    { 0x13a0, 0x13f5 },
+    { 0x13f8, 0x13fd },
+    { 0x1401, 0x166c },
+    { 0x166f, 0x167f },
+    { 0x1681, 0x169a },
+    { 0x16a0, 0x16ea },
+    { 0x16f1, 0x16f8 },
+    { 0x1700, 0x170c },
+    { 0x170e, 0x1711 },
+    { 0x1720, 0x1731 },
+    { 0x1740, 0x1751 },
+    { 0x1760, 0x176c },
+    { 0x176e, 0x1770 },
+    { 0x1780, 0x17b3 },
+    { 0x17d7, 0x17d7 },
+    { 0x17dc, 0x17dc },
+    { 0x1820, 0x1877 },
+    { 0x1880, 0x18a8 },
+    { 0x18aa, 0x18aa },
+    { 0x18b0, 0x18f5 },
+    { 0x1900, 0x191e },
+    { 0x1950, 0x196d },
+    { 0x1970, 0x1974 },
+    { 0x1980, 0x19ab },
+    { 0x19b0, 0x19c9 },
+    { 0x1a00, 0x1a16 },
+    { 0x1a20, 0x1a54 },
+    { 0x1aa7, 0x1aa7 },
+    { 0x1b05, 0x1b33 },
+    { 0x1b45, 0x1b4b },
+    { 0x1b83, 0x1ba0 },
+    { 0x1bae, 0x1baf },
+    { 0x1bba, 0x1be5 },
+    { 0x1c00, 0x1c23 },
+    { 0x1c4d, 0x1c4f },
+    { 0x1c5a, 0x1c7d },
+    { 0x1ce9, 0x1cec },
+    { 0x1cee, 0x1cf1 },
+    { 0x1cf5, 0x1cf6 },
+    { 0x1d00, 0x1dbf },
+    { 0x1e00, 0x1f15 },
+    { 0x1f18, 0x1f1d },
+    { 0x1f20, 0x1f45 },
+    { 0x1f48, 0x1f4d },
+    { 0x1f50, 0x1f57 },
+    { 0x1f59, 0x1f59 },
+    { 0x1f5b, 0x1f5b },
+    { 0x1f5d, 0x1f5d },
+    { 0x1f5f, 0x1f7d },
+    { 0x1f80, 0x1fb4 },
+    { 0x1fb6, 0x1fbc },
+    { 0x1fbe, 0x1fbe },
+    { 0x1fc2, 0x1fc4 },
+    { 0x1fc6, 0x1fcc },
+    { 0x1fd0, 0x1fd3 },
+    { 0x1fd6, 0x1fdb },
+    { 0x1fe0, 0x1fec },
+    { 0x1ff2, 0x1ff4 },
+    { 0x1ff6, 0x1ffc },
+    { 0x2071, 0x2071 },
+    { 0x207f, 0x207f },
+    { 0x2090, 0x209c },
+    { 0x2102, 0x2102 },
+    { 0x2107, 0x2107 },
+    { 0x210a, 0x2113 },
+    { 0x2115, 0x2115 },
+    { 0x2119, 0x211d },
+    { 0x2124, 0x2124 },
+    { 0x2126, 0x2126 },
+    { 0x2128, 0x2128 },
+    { 0x212a, 0x212d },
+    { 0x212f, 0x2139 },
+    { 0x213c, 0x213f },
+    { 0x2145, 0x2149 },
+    { 0x214e, 0x214e },
+    { 0x2183, 0x2184 },
+    { 0x2c00, 0x2c2e },
+    { 0x2c30, 0x2c5e },
+    { 0x2c60, 0x2ce4 },
+    { 0x2ceb, 0x2cee },
+    { 0x2cf2, 0x2cf3 },
+    { 0x2d00, 0x2d25 },
+    { 0x2d27, 0x2d27 },
+    { 0x2d2d, 0x2d2d },
+    { 0x2d30, 0x2d67 },
+    { 0x2d6f, 0x2d6f },
+    { 0x2d80, 0x2d96 },
+    { 0x2da0, 0x2da6 },
+    { 0x2da8, 0x2dae },
+    { 0x2db0, 0x2db6 },
+    { 0x2db8, 0x2dbe },
+    { 0x2dc0, 0x2dc6 },
+    { 0x2dc8, 0x2dce },
+    { 0x2dd0, 0x2dd6 },
+    { 0x2dd8, 0x2dde },
+    { 0x2e2f, 0x2e2f },
+    { 0x3005, 0x3006 },
+    { 0x3031, 0x3035 },
+    { 0x303b, 0x303c },
+    { 0x3041, 0x3096 },
+    { 0x309d, 0x309f },
+    { 0x30a1, 0x30fa },
+    { 0x30fc, 0x30ff },
+    { 0x3105, 0x312d },
+    { 0x3131, 0x318e },
+    { 0x31a0, 0x31ba },
+    { 0x31f0, 0x31ff },
+    { 0x3400, 0x3400 },
+    { 0x4db5, 0x4db5 },
+    { 0x4e00, 0x4e00 },
+    { 0x9fd5, 0x9fd5 },
+    { 0xa000, 0xa48c },
+    { 0xa4d0, 0xa4fd },
+    { 0xa500, 0xa60c },
+    { 0xa610, 0xa61f },
+    { 0xa62a, 0xa62b },
+    { 0xa640, 0xa66e },
+    { 0xa67f, 0xa69d },
+    { 0xa6a0, 0xa6e5 },
+    { 0xa717, 0xa71f },
+    { 0xa722, 0xa788 },
+    { 0xa78b, 0xa7ad },
+    { 0xa7b0, 0xa7b7 },
+    { 0xa7f7, 0xa801 },
+    { 0xa803, 0xa805 },
+    { 0xa807, 0xa80a },
+    { 0xa80c, 0xa822 },
+    { 0xa840, 0xa873 },
+    { 0xa882, 0xa8b3 },
+    { 0xa8f2, 0xa8f7 },
+    { 0xa8fb, 0xa8fb },
+    { 0xa8fd, 0xa8fd },
+    { 0xa90a, 0xa925 },
+    { 0xa930, 0xa946 },
+    { 0xa960, 0xa97c },
+    { 0xa984, 0xa9b2 },
+    { 0xa9cf, 0xa9cf },
+    { 0xa9e0, 0xa9e4 },
+    { 0xa9e6, 0xa9ef },
+    { 0xa9fa, 0xa9fe },
+    { 0xaa00, 0xaa28 },
+    { 0xaa40, 0xaa42 },
+    { 0xaa44, 0xaa4b },
+    { 0xaa60, 0xaa76 },
+    { 0xaa7a, 0xaa7a },
+    { 0xaa7e, 0xaaaf },
+    { 0xaab1, 0xaab1 },
+    { 0xaab5, 0xaab6 },
+    { 0xaab9, 0xaabd },
+    { 0xaac0, 0xaac0 },
+    { 0xaac2, 0xaac2 },
+    { 0xaadb, 0xaadd },
+    { 0xaae0, 0xaaea },
+    { 0xaaf2, 0xaaf4 },
+    { 0xab01, 0xab06 },
+    { 0xab09, 0xab0e },
+    { 0xab11, 0xab16 },
+    { 0xab20, 0xab26 },
+    { 0xab28, 0xab2e },
+    { 0xab30, 0xab5a },
+    { 0xab5c, 0xab65 },
+    { 0xab70, 0xabe2 },
+    { 0xac00, 0xac00 },
+    { 0xd7a3, 0xd7a3 },
+    { 0xd7b0, 0xd7c6 },
+    { 0xd7cb, 0xd7fb },
+    { 0xf900, 0xfa6d },
+    { 0xfa70, 0xfad9 },
+    { 0xfb00, 0xfb06 },
+    { 0xfb13, 0xfb17 },
+    { 0xfb1d, 0xfb1d },
+    { 0xfb1f, 0xfb28 },
+    { 0xfb2a, 0xfb36 },
+    { 0xfb38, 0xfb3c },
+    { 0xfb3e, 0xfb3e },
+    { 0xfb40, 0xfb41 },
+    { 0xfb43, 0xfb44 },
+    { 0xfb46, 0xfbb1 },
+    { 0xfbd3, 0xfd3d },
+    { 0xfd50, 0xfd8f },
+    { 0xfd92, 0xfdc7 },
+    { 0xfdf0, 0xfdfb },
+    { 0xfe70, 0xfe74 },
+    { 0xfe76, 0xfefc },
+    { 0xff21, 0xff3a },
+    { 0xff41, 0xff5a },
+    { 0xff66, 0xffbe },
+    { 0xffc2, 0xffc7 },
+    { 0xffca, 0xffcf },
+    { 0xffd2, 0xffd7 },
+    { 0xffda, 0xffdc },
+    { 0x10000, 0x1000b },
+    { 0x1000d, 0x10026 },
+    { 0x10028, 0x1003a },
+    { 0x1003c, 0x1003d },
+    { 0x1003f, 0x1004d },
+    { 0x10050, 0x1005d },
+    { 0x10080, 0x100fa },
+    { 0x10280, 0x1029c },
+    { 0x102a0, 0x102d0 },
+    { 0x10300, 0x1031f },
+    { 0x10330, 0x10340 },
+    { 0x10342, 0x10349 },
+    { 0x10350, 0x10375 },
+    { 0x10380, 0x1039d },
+    { 0x103a0, 0x103c3 },
+    { 0x103c8, 0x103cf },
+    { 0x10400, 0x1049d },
+    { 0x10500, 0x10527 },
+    { 0x10530, 0x10563 },
+    { 0x10600, 0x10736 },
+    { 0x10740, 0x10755 },
+    { 0x10760, 0x10767 },
+    { 0x10800, 0x10805 },
+    { 0x10808, 0x10808 },
+    { 0x1080a, 0x10835 },
+    { 0x10837, 0x10838 },
+    { 0x1083c, 0x1083c },
+    { 0x1083f, 0x10855 },
+    { 0x10860, 0x10876 },
+    { 0x10880, 0x1089e },
+    { 0x108e0, 0x108f2 },
+    { 0x108f4, 0x108f5 },
+    { 0x10900, 0x10915 },
+    { 0x10920, 0x10939 },
+    { 0x10980, 0x109b7 },
+    { 0x109be, 0x109bf },
+    { 0x10a00, 0x10a00 },
+    { 0x10a10, 0x10a13 },
+    { 0x10a15, 0x10a17 },
+    { 0x10a19, 0x10a33 },
+    { 0x10a60, 0x10a7c },
+    { 0x10a80, 0x10a9c },
+    { 0x10ac0, 0x10ac7 },
+    { 0x10ac9, 0x10ae4 },
+    { 0x10b00, 0x10b35 },
+    { 0x10b40, 0x10b55 },
+    { 0x10b60, 0x10b72 },
+    { 0x10b80, 0x10b91 },
+    { 0x10c00, 0x10c48 },
+    { 0x10c80, 0x10cb2 },
+    { 0x10cc0, 0x10cf2 },
+    { 0x11003, 0x11037 },
+    { 0x11083, 0x110af },
+    { 0x110d0, 0x110e8 },
+    { 0x11103, 0x11126 },
+    { 0x11150, 0x11172 },
+    { 0x11176, 0x11176 },
+    { 0x11183, 0x111b2 },
+    { 0x111c1, 0x111c4 },
+    { 0x111da, 0x111da },
+    { 0x111dc, 0x111dc },
+    { 0x11200, 0x11211 },
+    { 0x11213, 0x1122b },
+    { 0x11280, 0x11286 },
+    { 0x11288, 0x11288 },
+    { 0x1128a, 0x1128d },
+    { 0x1128f, 0x1129d },
+    { 0x1129f, 0x112a8 },
+    { 0x112b0, 0x112de },
+    { 0x11305, 0x1130c },
+    { 0x1130f, 0x11310 },
+    { 0x11313, 0x11328 },
+    { 0x1132a, 0x11330 },
+    { 0x11332, 0x11333 },
+    { 0x11335, 0x11339 },
+    { 0x1133d, 0x1133d },
+    { 0x11350, 0x11350 },
+    { 0x1135d, 0x11361 },
+    { 0x11480, 0x114af },
+    { 0x114c4, 0x114c5 },
+    { 0x114c7, 0x114c7 },
+    { 0x11580, 0x115ae },
+    { 0x115d8, 0x115db },
+    { 0x11600, 0x1162f },
+    { 0x11644, 0x11644 },
+    { 0x11680, 0x116aa },
+    { 0x11700, 0x11719 },
+    { 0x118a0, 0x118df },
+    { 0x118ff, 0x118ff },
+    { 0x11ac0, 0x11af8 },
+    { 0x12000, 0x12399 },
+    { 0x12480, 0x12543 },
+    { 0x13000, 0x1342e },
+    { 0x14400, 0x14646 },
+    { 0x16800, 0x16a38 },
+    { 0x16a40, 0x16a5e },
+    { 0x16ad0, 0x16aed },
+    { 0x16b00, 0x16b2f },
+    { 0x16b40, 0x16b43 },
+    { 0x16b63, 0x16b77 },
+    { 0x16b7d, 0x16b8f },
+    { 0x16f00, 0x16f44 },
+    { 0x16f50, 0x16f50 },
+    { 0x16f93, 0x16f9f },
+    { 0x1b000, 0x1b001 },
+    { 0x1bc00, 0x1bc6a },
+    { 0x1bc70, 0x1bc7c },
+    { 0x1bc80, 0x1bc88 },
+    { 0x1bc90, 0x1bc99 },
+    { 0x1d400, 0x1d454 },
+    { 0x1d456, 0x1d49c },
+    { 0x1d49e, 0x1d49f },
+    { 0x1d4a2, 0x1d4a2 },
+    { 0x1d4a5, 0x1d4a6 },
+    { 0x1d4a9, 0x1d4ac },
+    { 0x1d4ae, 0x1d4b9 },
+    { 0x1d4bb, 0x1d4bb },
+    { 0x1d4bd, 0x1d4c3 },
+    { 0x1d4c5, 0x1d505 },
+    { 0x1d507, 0x1d50a },
+    { 0x1d50d, 0x1d514 },
+    { 0x1d516, 0x1d51c },
+    { 0x1d51e, 0x1d539 },
+    { 0x1d53b, 0x1d53e },
+    { 0x1d540, 0x1d544 },
+    { 0x1d546, 0x1d546 },
+    { 0x1d54a, 0x1d550 },
+    { 0x1d552, 0x1d6a5 },
+    { 0x1d6a8, 0x1d6c0 },
+    { 0x1d6c2, 0x1d6da },
+    { 0x1d6dc, 0x1d6fa },
+    { 0x1d6fc, 0x1d714 },
+    { 0x1d716, 0x1d734 },
+    { 0x1d736, 0x1d74e },
+    { 0x1d750, 0x1d76e },
+    { 0x1d770, 0x1d788 },
+    { 0x1d78a, 0x1d7a8 },
+    { 0x1d7aa, 0x1d7c2 },
+    { 0x1d7c4, 0x1d7cb },
+    { 0x1e800, 0x1e8c4 },
+    { 0x1ee00, 0x1ee03 },
+    { 0x1ee05, 0x1ee1f },
+    { 0x1ee21, 0x1ee22 },
+    { 0x1ee24, 0x1ee24 },
+    { 0x1ee27, 0x1ee27 },
+    { 0x1ee29, 0x1ee32 },
+    { 0x1ee34, 0x1ee37 },
+    { 0x1ee39, 0x1ee39 },
+    { 0x1ee3b, 0x1ee3b },
+    { 0x1ee42, 0x1ee42 },
+    { 0x1ee47, 0x1ee47 },
+    { 0x1ee49, 0x1ee49 },
+    { 0x1ee4b, 0x1ee4b },
+    { 0x1ee4d, 0x1ee4f },
+    { 0x1ee51, 0x1ee52 },
+    { 0x1ee54, 0x1ee54 },
+    { 0x1ee57, 0x1ee57 },
+    { 0x1ee59, 0x1ee59 },
+    { 0x1ee5b, 0x1ee5b },
+    { 0x1ee5d, 0x1ee5d },
+    { 0x1ee5f, 0x1ee5f },
+    { 0x1ee61, 0x1ee62 },
+    { 0x1ee64, 0x1ee64 },
+    { 0x1ee67, 0x1ee6a },
+    { 0x1ee6c, 0x1ee72 },
+    { 0x1ee74, 0x1ee77 },
+    { 0x1ee79, 0x1ee7c },
+    { 0x1ee7e, 0x1ee7e },
+    { 0x1ee80, 0x1ee89 },
+    { 0x1ee8b, 0x1ee9b },
+    { 0x1eea1, 0x1eea3 },
+    { 0x1eea5, 0x1eea9 },
+    { 0x1eeab, 0x1eebb },
+    { 0x20000, 0x20000 },
+    { 0x2a6d6, 0x2a6d6 },
+    { 0x2a700, 0x2a700 },
+    { 0x2b734, 0x2b734 },
+    { 0x2b740, 0x2b740 },
+    { 0x2b81d, 0x2b81d },
+    { 0x2b820, 0x2b820 },
+    { 0x2cea1, 0x2cea1 },
+    { 0x2f800, 0x2fa1d }
+};
+
+extern int runeinrange(const void* a, const void* b);
+bool isalpharune(Rune ch) {
+    return (NULL != bsearch(&ch, alphas, 561, 2 * sizeof(Rune), &runeinrange));
+}
diff --git a/source/utf/digits.c b/source/utf/digits.c
new file mode 100644 (file)
index 0000000..405fab0
--- /dev/null
@@ -0,0 +1,60 @@
+#include <libc.h>
+
+static Rune digits[51][2] = {
+    { 0x30, 0x39 },
+    { 0x660, 0x669 },
+    { 0x6f0, 0x6f9 },
+    { 0x7c0, 0x7c9 },
+    { 0x966, 0x96f },
+    { 0x9e6, 0x9ef },
+    { 0xa66, 0xa6f },
+    { 0xae6, 0xaef },
+    { 0xb66, 0xb6f },
+    { 0xbe6, 0xbef },
+    { 0xc66, 0xc6f },
+    { 0xce6, 0xcef },
+    { 0xd66, 0xd6f },
+    { 0xde6, 0xdef },
+    { 0xe50, 0xe59 },
+    { 0xed0, 0xed9 },
+    { 0xf20, 0xf29 },
+    { 0x1040, 0x1049 },
+    { 0x1090, 0x1099 },
+    { 0x17e0, 0x17e9 },
+    { 0x1810, 0x1819 },
+    { 0x1946, 0x194f },
+    { 0x19d0, 0x19d9 },
+    { 0x1a80, 0x1a89 },
+    { 0x1a90, 0x1a99 },
+    { 0x1b50, 0x1b59 },
+    { 0x1bb0, 0x1bb9 },
+    { 0x1c40, 0x1c49 },
+    { 0x1c50, 0x1c59 },
+    { 0xa620, 0xa629 },
+    { 0xa8d0, 0xa8d9 },
+    { 0xa900, 0xa909 },
+    { 0xa9d0, 0xa9d9 },
+    { 0xa9f0, 0xa9f9 },
+    { 0xaa50, 0xaa59 },
+    { 0xabf0, 0xabf9 },
+    { 0xff10, 0xff19 },
+    { 0x104a0, 0x104a9 },
+    { 0x11066, 0x1106f },
+    { 0x110f0, 0x110f9 },
+    { 0x11136, 0x1113f },
+    { 0x111d0, 0x111d9 },
+    { 0x112f0, 0x112f9 },
+    { 0x114d0, 0x114d9 },
+    { 0x11650, 0x11659 },
+    { 0x116c0, 0x116c9 },
+    { 0x11730, 0x11739 },
+    { 0x118e0, 0x118e9 },
+    { 0x16a60, 0x16a69 },
+    { 0x16b50, 0x16b59 },
+    { 0x1d7ce, 0x1d7ff }
+};
+
+extern int runeinrange(const void* a, const void* b);
+bool isdigitrune(Rune ch) {
+    return (NULL != bsearch(&ch, digits, 51, 2 * sizeof(Rune), &runeinrange));
+}
diff --git a/source/utf/other.c b/source/utf/other.c
new file mode 100644 (file)
index 0000000..aef4f44
--- /dev/null
@@ -0,0 +1,35 @@
+#include <libc.h>
+
+static Rune other[26][2] = {
+    { 0xad, 0xad },
+    { 0x600, 0x605 },
+    { 0x61c, 0x61c },
+    { 0x6dd, 0x6dd },
+    { 0x70f, 0x70f },
+    { 0x180e, 0x180e },
+    { 0x200b, 0x200f },
+    { 0x202a, 0x202e },
+    { 0x2060, 0x2064 },
+    { 0x2066, 0x206f },
+    { 0xd800, 0xd800 },
+    { 0xdb7f, 0xdb80 },
+    { 0xdbff, 0xdc00 },
+    { 0xdfff, 0xe000 },
+    { 0xf8ff, 0xf8ff },
+    { 0xfeff, 0xfeff },
+    { 0xfff9, 0xfffb },
+    { 0x110bd, 0x110bd },
+    { 0x1bca0, 0x1bca3 },
+    { 0x1d173, 0x1d17a },
+    { 0xe0001, 0xe0001 },
+    { 0xe0020, 0xe007f },
+    { 0xf0000, 0xf0000 },
+    { 0xffffd, 0xffffd },
+    { 0x100000, 0x100000 },
+    { 0x10fffd, 0x10fffd }
+};
+
+extern int runeinrange(const void* a, const void* b);
+bool isotherrune(Rune ch) {
+    return (NULL != bsearch(&ch, other, 26, 2 * sizeof(Rune), &runeinrange));
+}
index 701d830597e567f3de7051cce2c93d561e240a9f..cafb0374b4f6dd21ecfd25bbedd527dfca24f45a 100644 (file)
@@ -24,23 +24,9 @@ Rune tobaserune(Rune ch)
     return 0;
 }
 
-
-
-
-bool isalpharune(Rune ch)
-{
-    return (isupperrune(ch) || islowerrune(ch));
-}
-
 bool isbaserune(Rune ch)
 {
     (void)ch;
     return false;
 }
 
-bool isdigitrune(Rune ch)
-{
-    (void)ch;
-    return false;
-}
-
index cdbecf1c57e7259ec23d47821750ec3c1cdafcfe..fd0863633177268e23fd28d4ed8cb20c1f3f4c6e 100755 (executable)
@@ -6,48 +6,51 @@ $types = {}
 # Map of of all rune types to lookup table name
 $typemap = {
   # Letter Types
-  "Lu" => :uppers,        # Upper Case
-  "Ll" => :lowers,        # Lower Case
-  "Lt" => :titles,        # Title Case
-  "LC" => :otherletters,  # Cased Letter
-  "Lm" => :otherletters,  # Modifier Letter
-  "Lo" => :otherletters,  # Other Letter
+  "Lu" => [:alphas,:uppers],       # Upper Case
+  "Ll" => [:alphas,:lowers],       # Lower Case
+  "Lt" => [:alphas,:titles],       # Title Case
+  "LC" => [:alphas,:otherletters], # Cased Letter
+  "Lm" => [:alphas,:otherletters], # Modifier Letter
+  "Lo" => [:alphas,:otherletters], # Other Letter
   # Combining Marks
-  "Mn" => :marks,         # Non-Spacing Mark
-  "Mc" => :marks,         # Spacing Mark
-  "Me" => :marks,         # Enclosing Mark
+  "Mn" => [:marks],                # Non-Spacing Mark
+  "Mc" => [:marks],                # Spacing Mark
+  "Me" => [:marks],                # Enclosing Mark
   # Numbers
-  "Nd" => :numbers,       # Decimal Digit
-  "Nl" => :numbers,       # Letter Number
-  "No" => :numbers,       # Other Number
+  "Nd" => [:numbers,:digits],      # Decimal Digit
+  "Nl" => [:numbers],              # Letter Number
+  "No" => [:numbers],              # Other Number
   # Punctuation
-  "Pc" => :punctuation,   # Connector Punctuation
-  "Pd" => :punctuation,   # Dash Punctuation
-  "Ps" => :punctuation,   # Open Punctuation
-  "Pe" => :punctuation,   # Close Punctuation
-  "Pi" => :punctuation,   # Initial Punctuation
-  "Pf" => :punctuation,   # Final Punctuation
-  "Po" => :punctuation,   # Other Punctuation
+  "Pc" => [:punctuation],          # Connector Punctuation
+  "Pd" => [:punctuation],          # Dash Punctuation
+  "Ps" => [:punctuation],          # Open Punctuation
+  "Pe" => [:punctuation],          # Close Punctuation
+  "Pi" => [:punctuation],          # Initial Punctuation
+  "Pf" => [:punctuation],          # Final Punctuation
+  "Po" => [:punctuation],          # Other Punctuation
   # Symbols
-  "Sm" => :symbols,       # Math Symbol
-  "Sc" => :symbols,       # Currency Symbol
-  "Sk" => :symbols,       # Modifier Symbol
-  "So" => :symbols,       # Other Symbol
+  "Sm" => [:symbols],              # Math Symbol
+  "Sc" => [:symbols],              # Currency Symbol
+  "Sk" => [:symbols],              # Modifier Symbol
+  "So" => [:symbols],              # Other Symbol
   # Separator
-  "Zs" => :spaces,        # Space Separator
-  "Zl" => :spaces,        # Line Separator
-  "Zp" => :spaces,        # Paragraph Separator
+  "Zs" => [:spaces],               # Space Separator
+  "Zl" => [:spaces],               # Line Separator
+  "Zp" => [:spaces],               # Paragraph Separator
   # Other
-  "Cc" => :controls,      # Control
-  "Cf" => :other,         # Format
-  "Cs" => :other,         # Surrogate
-  "Co" => :other,         # Private Use
-  "Cn" => :other          # Unassigned
+  "Cc" => [:controls],             # Control
+  "Cf" => [:other],                # Format
+  "Cs" => [:other],                # Surrogate
+  "Co" => [:other],                # Private Use
+  "Cn" => [:other]                 # Unassigned
 }
+$typenames
 
-def register_codepoint(type, val)
-  $types[type] ||= []
-  $types[type] << val
+def register_codepoint(types, val)
+  types.each do |type|
+    $types[type] ||= []
+    $types[type] << val
+  end
 end
 
 if ARGV.length != 2 then
@@ -59,20 +62,20 @@ end
 unicode_data = File.open(ARGV[0],"r")
 unicode_data.each_line do |data|
   fields = data.split(';')
-  type  = $typemap[fields[2]]
-  stype = fields[4]
-  val   = fields[0].to_i(16)
+  types  = $typemap[fields[2]]
+  stype  = fields[4]
+  val    = fields[0].to_i(16)
 
   if (stype == "WS") || (stype == "S") || (stype == "B")
-    register_codepoint(:spaces, val)
-  elsif type == :uppers
-    register_codepoint(type, val)
-     register_codepoint(:tolowers, (fields[13] == "") ? val : fields[13].to_i(16))
-  elsif type == :lowers
-    register_codepoint(type, val)
-    register_codepoint(:touppers, (fields[14] == "") ? val : fields[14].to_i(16))
+    register_codepoint([:spaces], val)
+  elsif types.include? :uppers
+    register_codepoint(types, val)
+    register_codepoint([:tolowers], (fields[13] == "") ? val : fields[13].to_i(16))
+  elsif types.include? :lowers
+    register_codepoint(types, val)
+    register_codepoint([:touppers], (fields[14] == "") ? val : fields[14].to_i(16))
   else
-    register_codepoint(type, val)
+    register_codepoint(types, val)
   end
 end
 unicode_data.close()
@@ -97,27 +100,25 @@ def generate_typecheck_func(type, count)
 end
 
 def generate_type_table(type, altcase = [])
-  ranges = get_ranges($types[type])
-  pairs  = ranges.map{|r| "{ 0x#{r.first.to_s(16)}, 0x#{r.last.to_s(16)} }" }.join(",\n    ")
-  File.open("#{ARGV[1]}/#{type.to_s}.c", "w") do |f|
-    f.puts("#include <libc.h>\n\n")
-    f.puts("static Rune #{type.to_s}[#{ranges.length}][2] = {")
-    f.print('    ')
-    f.puts(pairs)
-    f.print("};\n\n");
-    f.print(generate_typecheck_func(type, ranges.length))
+  if $types[type]
+    ranges = get_ranges($types[type])
+    pairs  = ranges.map{|r| "{ 0x#{r.first.to_s(16)}, 0x#{r.last.to_s(16)} }" }.join(",\n    ")
+    File.open("#{ARGV[1]}/#{type.to_s}.c", "w") do |f|
+      f.puts("#include <libc.h>\n\n")
+      f.puts("static Rune #{type.to_s}[#{ranges.length}][2] = {")
+      f.print('    ')
+      f.puts(pairs)
+      f.print("};\n\n");
+      f.print(generate_typecheck_func(type, ranges.length))
+    end
   end
 end
 
 FileUtils.mkdir_p ARGV[1]
 generate_type_table(:uppers, :tolowers)
 generate_type_table(:lowers, :touppers)
-generate_type_table(:titles)
-generate_type_table(:otherletters)
-generate_type_table(:marks)
-generate_type_table(:numbers)
-generate_type_table(:punctuation)
-generate_type_table(:symbols)
-generate_type_table(:spaces)
-generate_type_table(:controls)
+alltypes = $typemap.values.flatten.uniq - [:uppers, :lowers]
+alltypes.each do |type|
+    generate_type_table(type)
+end