From a2976d8e80587017dd29a4d8c233e2e8488f348e Mon Sep 17 00:00:00 2001 From: "Mike D. Lowis" Date: Tue, 22 Sep 2015 15:54:24 -0400 Subject: [PATCH] Added very crappy, very primitive test case for verifying the is*rune functions behave as expected --- build.rb | 4 +- source/utf/alphas.c | 4 +- source/utf/controls.c | 12 ++-- source/utf/digits.c | 2 + source/utf/lowers.c | 4 +- source/utf/marks.c | 4 +- source/utf/numbers.c | 4 +- source/utf/other.c | 4 +- source/utf/otherletters.c | 4 +- source/utf/punctuation.c | 4 +- source/utf/runecmp.c | 9 +++ source/utf/spaces.c | 29 +++++++-- source/utf/symbols.c | 4 +- source/utf/titles.c | 4 +- source/utf/tolower.c | 4 +- source/utf/totitle.c | 4 +- source/utf/toupper.c | 4 +- source/utf/uppers.c | 4 +- tests/main.c | 1 + tests/utf/test_unicodedata.c | 121 +++++++++++++++++++++++++++++++++++ tools/unicode.rb | 57 ++++++++++++----- 21 files changed, 241 insertions(+), 46 deletions(-) create mode 100644 source/utf/runecmp.c create mode 100644 tests/utf/test_unicodedata.c diff --git a/build.rb b/build.rb index 54b0eb8..14f5d74 100755 --- a/build.rb +++ b/build.rb @@ -6,7 +6,7 @@ require './modules/build-system/setup' #------------------------------------------------------------------------------ # Define the default compiler environment main_env = BuildEnv.new do |env| - env["CFLAGS"] += ['-O3', '-Wall', '-Wextra', '--std=c99', '--pedantic'] + env["CFLAGS"] += ['-g', '-O3', '-Wall', '-Wextra', '--std=c99', '--pedantic'] env["CPPPATH"] += Dir['source/', 'modules/atf/source'] end @@ -21,6 +21,6 @@ main_env.Library('libc.a', FileList['source/**/*.c']) #------------------------------------------------------------------------------ if Opts[:profile].include? "test" main_env.Program('test_libc', Dir["tests/**/*.c", "modules/atf/source/*.c"] + ['./libc.a']) - main_env.Command('Unit Tests', 'test_libc', "CMD" => './test_libc') + main_env.Command('Unit Tests', 'test_libc', "CMD" => ['./test_libc']) end diff --git a/source/utf/alphas.c b/source/utf/alphas.c index 2befd89..d925069 100644 --- a/source/utf/alphas.c +++ b/source/utf/alphas.c @@ -567,9 +567,11 @@ static Rune ranges[433][2] = { { 0x2f800, 0x2fa1d } }; +extern int runecmp(const void* a, const void* b); + extern int runeinrange(const void* a, const void* b); bool isalpharune(Rune ch) { - return ((NULL != bsearch(&ch, singles, 128, sizeof(Rune), &runeinrange)) || + return ((NULL != bsearch(&ch, singles, 128, sizeof(Rune), &runecmp)) || (NULL != bsearch(&ch, ranges, 433, 2 * sizeof(Rune), &runeinrange))); } diff --git a/source/utf/controls.c b/source/utf/controls.c index 33fae2d..afa07d5 100644 --- a/source/utf/controls.c +++ b/source/utf/controls.c @@ -1,14 +1,14 @@ #include -static Rune ranges[4][2] = { - { 0x0, 0x8 }, - { 0xe, 0x1b }, - { 0x7f, 0x84 }, - { 0x86, 0x9f } +static Rune ranges[2][2] = { + { 0x0, 0x1f }, + { 0x7f, 0x9f } }; +extern int runecmp(const void* a, const void* b); + extern int runeinrange(const void* a, const void* b); bool iscontrolrune(Rune ch) { - return (NULL != bsearch(&ch, ranges, 4, 2 * sizeof(Rune), &runeinrange)); + return (NULL != bsearch(&ch, ranges, 2, 2 * sizeof(Rune), &runeinrange)); } diff --git a/source/utf/digits.c b/source/utf/digits.c index ea1df48..a6dcc28 100644 --- a/source/utf/digits.c +++ b/source/utf/digits.c @@ -54,6 +54,8 @@ static Rune ranges[51][2] = { { 0x1d7ce, 0x1d7ff } }; +extern int runecmp(const void* a, const void* b); + extern int runeinrange(const void* a, const void* b); bool isdigitrune(Rune ch) { diff --git a/source/utf/lowers.c b/source/utf/lowers.c index 00cf736..7dd1cc3 100644 --- a/source/utf/lowers.c +++ b/source/utf/lowers.c @@ -636,9 +636,11 @@ static Rune ranges[98][2] = { { 0x1d7c4, 0x1d7c9 } }; +extern int runecmp(const void* a, const void* b); + extern int runeinrange(const void* a, const void* b); bool islowerrune(Rune ch) { - return ((NULL != bsearch(&ch, singles, 532, sizeof(Rune), &runeinrange)) || + return ((NULL != bsearch(&ch, singles, 532, sizeof(Rune), &runecmp)) || (NULL != bsearch(&ch, ranges, 98, 2 * sizeof(Rune), &runeinrange))); } diff --git a/source/utf/marks.c b/source/utf/marks.c index 3101b7d..fd34500 100644 --- a/source/utf/marks.c +++ b/source/utf/marks.c @@ -242,9 +242,11 @@ static Rune ranges[191][2] = { { 0xe0100, 0xe01ef } }; +extern int runecmp(const void* a, const void* b); + extern int runeinrange(const void* a, const void* b); bool ismarkrune(Rune ch) { - return ((NULL != bsearch(&ch, singles, 45, sizeof(Rune), &runeinrange)) || + return ((NULL != bsearch(&ch, singles, 45, sizeof(Rune), &runecmp)) || (NULL != bsearch(&ch, ranges, 191, 2 * sizeof(Rune), &runeinrange))); } diff --git a/source/utf/numbers.c b/source/utf/numbers.c index 1f02213..095c131 100644 --- a/source/utf/numbers.c +++ b/source/utf/numbers.c @@ -117,9 +117,11 @@ static Rune ranges[105][2] = { { 0x1f100, 0x1f10c } }; +extern int runecmp(const void* a, const void* b); + extern int runeinrange(const void* a, const void* b); bool isnumberrune(Rune ch) { - return ((NULL != bsearch(&ch, singles, 6, sizeof(Rune), &runeinrange)) || + return ((NULL != bsearch(&ch, singles, 6, sizeof(Rune), &runecmp)) || (NULL != bsearch(&ch, ranges, 105, 2 * sizeof(Rune), &runeinrange))); } diff --git a/source/utf/other.c b/source/utf/other.c index 483b46c..e52b168 100644 --- a/source/utf/other.c +++ b/source/utf/other.c @@ -32,9 +32,11 @@ static Rune ranges[12][2] = { { 0xe0020, 0xe007f } }; +extern int runecmp(const void* a, const void* b); + extern int runeinrange(const void* a, const void* b); bool isotherrune(Rune ch) { - return ((NULL != bsearch(&ch, singles, 14, sizeof(Rune), &runeinrange)) || + return ((NULL != bsearch(&ch, singles, 14, sizeof(Rune), &runecmp)) || (NULL != bsearch(&ch, ranges, 12, 2 * sizeof(Rune), &runeinrange))); } diff --git a/source/utf/otherletters.c b/source/utf/otherletters.c index 3f65ac2..207ba9c 100644 --- a/source/utf/otherletters.c +++ b/source/utf/otherletters.c @@ -476,9 +476,11 @@ static Rune ranges[354][2] = { { 0x2f800, 0x2fa1d } }; +extern int runecmp(const void* a, const void* b); + extern int runeinrange(const void* a, const void* b); bool isotherletterrune(Rune ch) { - return ((NULL != bsearch(&ch, singles, 116, sizeof(Rune), &runeinrange)) || + return ((NULL != bsearch(&ch, singles, 116, sizeof(Rune), &runecmp)) || (NULL != bsearch(&ch, ranges, 354, 2 * sizeof(Rune), &runeinrange))); } diff --git a/source/utf/punctuation.c b/source/utf/punctuation.c index e74a546..3708fb9 100644 --- a/source/utf/punctuation.c +++ b/source/utf/punctuation.c @@ -167,9 +167,11 @@ static Rune ranges[106][2] = { { 0x1da87, 0x1da8b } }; +extern int runecmp(const void* a, const void* b); + extern int runeinrange(const void* a, const void* b); bool ispunctuationrune(Rune ch) { - return ((NULL != bsearch(&ch, singles, 55, sizeof(Rune), &runeinrange)) || + return ((NULL != bsearch(&ch, singles, 55, sizeof(Rune), &runecmp)) || (NULL != bsearch(&ch, ranges, 106, 2 * sizeof(Rune), &runeinrange))); } diff --git a/source/utf/runecmp.c b/source/utf/runecmp.c new file mode 100644 index 0000000..f42f2c9 --- /dev/null +++ b/source/utf/runecmp.c @@ -0,0 +1,9 @@ +#include + +/* Used by rune type checking functions to find the rune in the type tables */ +int runecmp(const void* a, const void* b) { + Rune* ra = (Rune*)a; + Rune* rb = (Rune*)b; + return *ra - *rb; +} + diff --git a/source/utf/spaces.c b/source/utf/spaces.c index ef76084..1edcf2f 100644 --- a/source/utf/spaces.c +++ b/source/utf/spaces.c @@ -1,24 +1,43 @@ #include -static Rune singles[6] = { +static Rune singles[14] = { + 0x20, 0x85, 0xa0, 0x1680, + 0x1680, + 0x2000, + 0x200a, + 0x2028, + 0x2029, 0x202f, 0x205f, + 0x205f, + 0x3000, 0x3000 }; -static Rune ranges[4][2] = { +static Rune ranges[13][2] = { { 0x9, 0xd }, { 0x1c, 0x20 }, - { 0x2000, 0x200a }, + { 0x2000, 0x2001 }, + { 0x2001, 0x2002 }, + { 0x2002, 0x2003 }, + { 0x2003, 0x2004 }, + { 0x2004, 0x2005 }, + { 0x2005, 0x2006 }, + { 0x2006, 0x2007 }, + { 0x2007, 0x2008 }, + { 0x2008, 0x2009 }, + { 0x2009, 0x200a }, { 0x2028, 0x2029 } }; +extern int runecmp(const void* a, const void* b); + extern int runeinrange(const void* a, const void* b); bool isspacerune(Rune ch) { - return ((NULL != bsearch(&ch, singles, 6, sizeof(Rune), &runeinrange)) || - (NULL != bsearch(&ch, ranges, 4, 2 * sizeof(Rune), &runeinrange))); + return ((NULL != bsearch(&ch, singles, 14, sizeof(Rune), &runecmp)) || + (NULL != bsearch(&ch, ranges, 13, 2 * sizeof(Rune), &runeinrange))); } diff --git a/source/utf/symbols.c b/source/utf/symbols.c index 91e244b..b6576a5 100644 --- a/source/utf/symbols.c +++ b/source/utf/symbols.c @@ -220,9 +220,11 @@ static Rune ranges[144][2] = { { 0x1f980, 0x1f984 } }; +extern int runecmp(const void* a, const void* b); + extern int runeinrange(const void* a, const void* b); bool issymbolrune(Rune ch) { - return ((NULL != bsearch(&ch, singles, 70, sizeof(Rune), &runeinrange)) || + return ((NULL != bsearch(&ch, singles, 70, sizeof(Rune), &runecmp)) || (NULL != bsearch(&ch, ranges, 144, 2 * sizeof(Rune), &runeinrange))); } diff --git a/source/utf/titles.c b/source/utf/titles.c index ed43eff..df2cced 100644 --- a/source/utf/titles.c +++ b/source/utf/titles.c @@ -16,9 +16,11 @@ static Rune ranges[3][2] = { { 0x1fa8, 0x1faf } }; +extern int runecmp(const void* a, const void* b); + extern int runeinrange(const void* a, const void* b); bool istitlerune(Rune ch) { - return ((NULL != bsearch(&ch, singles, 7, sizeof(Rune), &runeinrange)) || + return ((NULL != bsearch(&ch, singles, 7, sizeof(Rune), &runecmp)) || (NULL != bsearch(&ch, ranges, 3, 2 * sizeof(Rune), &runeinrange))); } diff --git a/source/utf/tolower.c b/source/utf/tolower.c index d2052a0..e48e2e3 100644 --- a/source/utf/tolower.c +++ b/source/utf/tolower.c @@ -1236,9 +1236,9 @@ static Rune mappings[1233][2] = { { 0x118bf, 0x118df }, }; -extern int runeinrange(const void* a, const void* b); +extern int runecmp(const void* a, const void* b); Rune tolowerrune(Rune ch) { - Rune* to = bsearch(&ch, mappings, 1233, 2 * sizeof(Rune), &runeinrange); + Rune* to = bsearch(&ch, mappings, 1233, 2 * sizeof(Rune), &runecmp); return (to == NULL) ? ch : to[1]; } diff --git a/source/utf/totitle.c b/source/utf/totitle.c index d6831d5..c208dc5 100644 --- a/source/utf/totitle.c +++ b/source/utf/totitle.c @@ -1248,9 +1248,9 @@ static Rune mappings[1245][2] = { { 0x118df, 0x118bf }, }; -extern int runeinrange(const void* a, const void* b); +extern int runecmp(const void* a, const void* b); Rune totitlerune(Rune ch) { - Rune* to = bsearch(&ch, mappings, 1245, 2 * sizeof(Rune), &runeinrange); + Rune* to = bsearch(&ch, mappings, 1245, 2 * sizeof(Rune), &runecmp); return (to == NULL) ? ch : to[1]; } diff --git a/source/utf/toupper.c b/source/utf/toupper.c index e785b40..c646f43 100644 --- a/source/utf/toupper.c +++ b/source/utf/toupper.c @@ -1244,9 +1244,9 @@ static Rune mappings[1241][2] = { { 0x118df, 0x118bf }, }; -extern int runeinrange(const void* a, const void* b); +extern int runecmp(const void* a, const void* b); Rune toupperrune(Rune ch) { - Rune* to = bsearch(&ch, mappings, 1241, 2 * sizeof(Rune), &runeinrange); + Rune* to = bsearch(&ch, mappings, 1241, 2 * sizeof(Rune), &runecmp); return (to == NULL) ? ch : to[1]; } diff --git a/source/utf/uppers.c b/source/utf/uppers.c index 66fae33..87f4a70 100644 --- a/source/utf/uppers.c +++ b/source/utf/uppers.c @@ -631,9 +631,11 @@ static Rune ranges[86][2] = { { 0x1d790, 0x1d7a8 } }; +extern int runecmp(const void* a, const void* b); + extern int runeinrange(const void* a, const void* b); bool isupperrune(Rune ch) { - return ((NULL != bsearch(&ch, singles, 539, sizeof(Rune), &runeinrange)) || + return ((NULL != bsearch(&ch, singles, 539, sizeof(Rune), &runecmp)) || (NULL != bsearch(&ch, ranges, 86, 2 * sizeof(Rune), &runeinrange))); } diff --git a/tests/main.c b/tests/main.c index 15ae8f2..483ec68 100644 --- a/tests/main.c +++ b/tests/main.c @@ -8,5 +8,6 @@ int main(int argc, char** argv) RUN_EXTERN_TEST_SUITE(RefCount); RUN_EXTERN_TEST_SUITE(SList); RUN_EXTERN_TEST_SUITE(BSTree); + RUN_EXTERN_TEST_SUITE(UnicodeData); return (PRINT_TEST_RESULTS()); } diff --git a/tests/utf/test_unicodedata.c b/tests/utf/test_unicodedata.c new file mode 100644 index 0000000..9028eb7 --- /dev/null +++ b/tests/utf/test_unicodedata.c @@ -0,0 +1,121 @@ +// Unit Test Framework Includes +#include "atf.h" + +// File To Test +#include + +const char database[] = "./tools/UnicodeData-8.0.0.txt"; + +char line[1024] = {0}; + +typedef struct { + uint32_t value; + char* gencat; + char* bidircat; + uint32_t tolower; + uint32_t toupper; + uint32_t totitle; +} codept_t; + +typedef bool (*isrune_fn_t)(Rune); + +typedef struct { + char* category; + isrune_fn_t* fntable; +} cp_table_t; + +const cp_table_t runetypes[] = { + { "Lu", (isrune_fn_t[]){ isalpharune, isupperrune, NULL } }, + { "Ll", (isrune_fn_t[]){ isalpharune, islowerrune, NULL } }, + { "Lt", (isrune_fn_t[]){ isalpharune, istitlerune, NULL } }, + { "LC", (isrune_fn_t[]){ isalpharune, isotherletterrune, NULL } }, + { "Lm", (isrune_fn_t[]){ isalpharune, isotherletterrune, NULL } }, + { "Lo", (isrune_fn_t[]){ isalpharune, isotherletterrune, NULL } }, + { "Mn", (isrune_fn_t[]){ ismarkrune, NULL } }, + { "Mc", (isrune_fn_t[]){ ismarkrune, NULL } }, + { "Me", (isrune_fn_t[]){ ismarkrune, NULL } }, + { "Nd", (isrune_fn_t[]){ isnumberrune, isdigitrune, NULL } }, + { "Nl", (isrune_fn_t[]){ isnumberrune, NULL } }, + { "No", (isrune_fn_t[]){ isnumberrune, NULL } }, + { "Pc", (isrune_fn_t[]){ ispunctuationrune, NULL } }, + { "Pd", (isrune_fn_t[]){ ispunctuationrune, NULL } }, + { "Ps", (isrune_fn_t[]){ ispunctuationrune, NULL } }, + { "Pe", (isrune_fn_t[]){ ispunctuationrune, NULL } }, + { "Pi", (isrune_fn_t[]){ ispunctuationrune, NULL } }, + { "Pf", (isrune_fn_t[]){ ispunctuationrune, NULL } }, + { "Po", (isrune_fn_t[]){ ispunctuationrune, NULL } }, + { "Sm", (isrune_fn_t[]){ issymbolrune, NULL } }, + { "Sc", (isrune_fn_t[]){ issymbolrune, NULL } }, + { "Sk", (isrune_fn_t[]){ issymbolrune, NULL } }, + { "So", (isrune_fn_t[]){ issymbolrune, NULL } }, + { "Zs", (isrune_fn_t[]){ isspacerune, NULL } }, + { "Zl", (isrune_fn_t[]){ isspacerune, NULL } }, + { "Zp", (isrune_fn_t[]){ isspacerune, NULL } }, + { "Cc", (isrune_fn_t[]){ iscontrolrune, NULL } }, + { "Cf", (isrune_fn_t[]){ isotherrune, NULL } }, + { "Cs", (isrune_fn_t[]){ isotherrune, NULL } }, + { "Co", (isrune_fn_t[]){ isotherrune, NULL } }, + { "Cn", (isrune_fn_t[]){ isotherrune, NULL } }, + { NULL, (isrune_fn_t[]){ isotherrune, NULL } } +}; + +isrune_fn_t* lookup_codepoint(codept_t* cp) { + cp_table_t* types = &runetypes[0]; + while((*types).category) { + if (0 == strcmp((*types).category, cp->gencat)) + return (*types).fntable; + types++; + } + return NULL; +} + +bool read_codepoint(codept_t* ch, FILE* db) { + if (fgets(line, 1023, db)) { + ch->value = strtoul(strtok(line, ";"), NULL, 16); + strtok(NULL, ";"); // Description + ch->gencat = strdup(strtok(NULL, ";")); // General Category + strtok(NULL, ";"); // Class + ch->bidircat = strdup(strtok(NULL, ";")); // Bidirectional Category + strtok(NULL, ";"); // Decompositional Map + strtok(NULL, ";"); // Decimal Value + strtok(NULL, ";"); // Digit Value + strtok(NULL, ";"); // Numeric Value + strtok(NULL, ";"); // Mirrored + strtok(NULL, ";"); // UTF 1.0 Name + strtok(NULL, ";"); // Comment + strtok(NULL, ";"); // To Upper + strtok(NULL, ";"); // To Lower + strtok(NULL, ";"); // To Title + return true; + } + return false; +} + +void free_codepoint(codept_t* cp) { + free(cp->gencat); + free(cp->bidircat); +} + +//----------------------------------------------------------------------------- +// Begin Unit Tests +//----------------------------------------------------------------------------- +TEST_SUITE(UnicodeData) { + TEST(Verify_runetype_functions) { + bool test_passing = true; + codept_t cp; + FILE* db = fopen(database, "r"); + while(test_passing && read_codepoint(&cp, db)) { + isrune_fn_t* fns = lookup_codepoint(&cp); + while(fns && *fns) { + bool test_passing = (*fns)(cp.value); + if (!test_passing) { + fprintf(stderr, "Failed to handle codepoint: %x\n", cp.value); + break; + } + fns++; + } + free_codepoint(&cp); + } + fclose(db); + } +} diff --git a/tools/unicode.rb b/tools/unicode.rb index 4ddfb59..5c13c37 100755 --- a/tools/unicode.rb +++ b/tools/unicode.rb @@ -3,12 +3,9 @@ require 'fileutils' require 'stringio' -# Check if we have the right number of arguments, bail otherwise -if ARGV.length != 2 then - puts "Usage: unicode.rb DATABASE OUTDIR" - puts "\nError: Incorrect number of arguments" - exit 1 -end +Database = "./tools/UnicodeData-8.0.0.txt" +OutputDir = "./source/utf" +TestFile = "./tests/utf/test_unicodedata.c" # Struct definition for representing a unicode character UnicodeChar = Struct.new( @@ -74,6 +71,9 @@ $typemap = { "Cn" => [:other] # Unassigned } +# List of generated test cases for each codepoint +$tests = {} + #------------------------------------------------------------------------------ # Register a character in the designated type tables combining adjacent @@ -89,18 +89,24 @@ def register_codepoint(types, val) else $types[type] << val end + $tests[val.value] = [] + $tests[val.value] << "is#{type.to_s.gsub(/s$/,'')}rune(#{val.value})" + $tests[val.value] << "#{val.tolower} == tolowerrune(#{val.value})" if val.tolower > 0 + $tests[val.value] << "#{val.toupper} == toupperrune(#{val.value})" if val.toupper > 0 + $tests[val.value] << "#{val.totitle} == totitlerune(#{val.value})" if val.totitle > 0 end end # Generate a rune type checking function using the singles and ranges tables def generate_typecheck_func(type, numranges, numsingles) out = StringIO.new + out.print "extern int runecmp(const void* a, const void* b);\n\n" out.print "extern int runeinrange(const void* a, const void* b);\n\n" out.print "bool is#{type.to_s.gsub(/s$/,'')}rune(Rune ch) {\n" if numsingles == 0 out.print " return (NULL != bsearch(&ch, ranges, #{numranges}, 2 * sizeof(Rune), &runeinrange));\n" else - out.print " return ((NULL != bsearch(&ch, singles, #{numsingles}, sizeof(Rune), &runeinrange)) || \n" + out.print " return ((NULL != bsearch(&ch, singles, #{numsingles}, sizeof(Rune), &runecmp)) || \n" out.print " (NULL != bsearch(&ch, ranges, #{numranges}, 2 * sizeof(Rune), &runeinrange)));\n" end out.print "}\n" @@ -110,8 +116,8 @@ end # Generates rune type tables organized by singles and ranges def generate_type_tables(type) ranges, singles = $types[type].partition {|e| e.kind_of? Array } - puts "Generating #{ARGV[1]}/#{type.to_s}.c" - File.open("#{ARGV[1]}/#{type.to_s}.c", "w") do |f| + puts "Generating #{OutputDir}/#{type.to_s}.c" + File.open("#{OutputDir}/#{type.to_s}.c", "w") do |f| f.puts("#include \n\n") table = singles.map{|e| "0x#{e.value.to_s(16)}" }.join(",\n ") f.print("static Rune singles[#{singles.length}] = {\n #{table}\n};\n\n") if singles.length > 0 @@ -123,9 +129,9 @@ end def generate_to_func(type, tblsz) out = StringIO.new - out.print "extern int runeinrange(const void* a, const void* b);\n\n" + out.print "extern int runecmp(const void* a, const void* b);\n\n" out.print "Rune #{type}rune(Rune ch) {\n" - out.print " Rune* to = bsearch(&ch, mappings, #{tblsz}, 2 * sizeof(Rune), &runeinrange);\n" + out.print " Rune* to = bsearch(&ch, mappings, #{tblsz}, 2 * sizeof(Rune), &runecmp);\n" out.print " return (to == NULL) ? ch : to[1];\n" out.print "}\n" out.string @@ -133,8 +139,8 @@ end def generate_to_table(type) mappings = $types[:all].select{|e| e[type] > 0 } - puts "Generating #{ARGV[1]}/#{type.to_s}.c" - File.open("#{ARGV[1]}/#{type.to_s}.c", "w") do |f| + puts "Generating #{OutputDir}/#{type.to_s}.c" + File.open("#{OutputDir}/#{type.to_s}.c", "w") do |f| f.print "#include \n\n" f.print "static Rune mappings[#{mappings.length}][2] = {\n" mappings.each do |e| @@ -145,10 +151,27 @@ def generate_to_table(type) end end +def generate_test_file() + puts "Generating #{TestFile}" + File.open(TestFile, "w") do |f| + f.print("#include \n") + f.print("#include \n") + f.print("\nTEST_SUITE(UnicodeData) {\n") + $tests.each_pair do |codept,tests| + if codept < 250000 + f.print(" TEST(Codepoint_#{codept.to_s(16)}) {\n") + tests.each {|t| f.print " CHECK(#{t});\n" } + f.print(" }\n") + end + end + f.print("}\n") + end +end + #------------------------------------------------------------------------------ # Read in the unicode character database and sort it into type classes -unicode_data = File.open(ARGV[0],"r") +unicode_data = File.open(Database,"r") unicode_data.each_line do |data| char = UnicodeChar.new(*data.split(';')) char.value = char.value.to_i(16) @@ -160,18 +183,18 @@ unicode_data.each_line do |data| $types[:all] << char if (bicat == "WS") || (bicat == "S") || (bicat == "B") register_codepoint([:spaces], char) - else - register_codepoint(types, char) end + register_codepoint(types, char) end unicode_data.close() # Generate the runetype files into the designated directory -FileUtils.mkdir_p ARGV[1] +FileUtils.mkdir_p OutputDir $typemap.values.flatten.uniq.each do |type| generate_type_tables(type) end generate_to_table(:tolower) generate_to_table(:toupper) generate_to_table(:totitle) +#generate_test_file() -- 2.52.0