#------------------------------------------------------------------------------
# Define the default compiler environment
main_env = BuildEnv.new do |env|
- env["CFLAGS"] += ['-O3', '-Wall', '-Wextra', '--std=c99', '--pedantic']
+ env["CFLAGS"] += ['-g', '-O3', '-Wall', '-Wextra', '--std=c99', '--pedantic']
env["CPPPATH"] += Dir['source/', 'modules/atf/source']
end
#------------------------------------------------------------------------------
if Opts[:profile].include? "test"
main_env.Program('test_libc', Dir["tests/**/*.c", "modules/atf/source/*.c"] + ['./libc.a'])
- main_env.Command('Unit Tests', 'test_libc', "CMD" => './test_libc')
+ main_env.Command('Unit Tests', 'test_libc', "CMD" => ['./test_libc'])
end
{ 0x2f800, 0x2fa1d }
};
+extern int runecmp(const void* a, const void* b);
+
extern int runeinrange(const void* a, const void* b);
bool isalpharune(Rune ch) {
- return ((NULL != bsearch(&ch, singles, 128, sizeof(Rune), &runeinrange)) ||
+ return ((NULL != bsearch(&ch, singles, 128, sizeof(Rune), &runecmp)) ||
(NULL != bsearch(&ch, ranges, 433, 2 * sizeof(Rune), &runeinrange)));
}
#include <carl.h>
-static Rune ranges[4][2] = {
- { 0x0, 0x8 },
- { 0xe, 0x1b },
- { 0x7f, 0x84 },
- { 0x86, 0x9f }
+static Rune ranges[2][2] = {
+ { 0x0, 0x1f },
+ { 0x7f, 0x9f }
};
+extern int runecmp(const void* a, const void* b);
+
extern int runeinrange(const void* a, const void* b);
bool iscontrolrune(Rune ch) {
- return (NULL != bsearch(&ch, ranges, 4, 2 * sizeof(Rune), &runeinrange));
+ return (NULL != bsearch(&ch, ranges, 2, 2 * sizeof(Rune), &runeinrange));
}
{ 0x1d7ce, 0x1d7ff }
};
+extern int runecmp(const void* a, const void* b);
+
extern int runeinrange(const void* a, const void* b);
bool isdigitrune(Rune ch) {
{ 0x1d7c4, 0x1d7c9 }
};
+extern int runecmp(const void* a, const void* b);
+
extern int runeinrange(const void* a, const void* b);
bool islowerrune(Rune ch) {
- return ((NULL != bsearch(&ch, singles, 532, sizeof(Rune), &runeinrange)) ||
+ return ((NULL != bsearch(&ch, singles, 532, sizeof(Rune), &runecmp)) ||
(NULL != bsearch(&ch, ranges, 98, 2 * sizeof(Rune), &runeinrange)));
}
{ 0xe0100, 0xe01ef }
};
+extern int runecmp(const void* a, const void* b);
+
extern int runeinrange(const void* a, const void* b);
bool ismarkrune(Rune ch) {
- return ((NULL != bsearch(&ch, singles, 45, sizeof(Rune), &runeinrange)) ||
+ return ((NULL != bsearch(&ch, singles, 45, sizeof(Rune), &runecmp)) ||
(NULL != bsearch(&ch, ranges, 191, 2 * sizeof(Rune), &runeinrange)));
}
{ 0x1f100, 0x1f10c }
};
+extern int runecmp(const void* a, const void* b);
+
extern int runeinrange(const void* a, const void* b);
bool isnumberrune(Rune ch) {
- return ((NULL != bsearch(&ch, singles, 6, sizeof(Rune), &runeinrange)) ||
+ return ((NULL != bsearch(&ch, singles, 6, sizeof(Rune), &runecmp)) ||
(NULL != bsearch(&ch, ranges, 105, 2 * sizeof(Rune), &runeinrange)));
}
{ 0xe0020, 0xe007f }
};
+extern int runecmp(const void* a, const void* b);
+
extern int runeinrange(const void* a, const void* b);
bool isotherrune(Rune ch) {
- return ((NULL != bsearch(&ch, singles, 14, sizeof(Rune), &runeinrange)) ||
+ return ((NULL != bsearch(&ch, singles, 14, sizeof(Rune), &runecmp)) ||
(NULL != bsearch(&ch, ranges, 12, 2 * sizeof(Rune), &runeinrange)));
}
{ 0x2f800, 0x2fa1d }
};
+extern int runecmp(const void* a, const void* b);
+
extern int runeinrange(const void* a, const void* b);
bool isotherletterrune(Rune ch) {
- return ((NULL != bsearch(&ch, singles, 116, sizeof(Rune), &runeinrange)) ||
+ return ((NULL != bsearch(&ch, singles, 116, sizeof(Rune), &runecmp)) ||
(NULL != bsearch(&ch, ranges, 354, 2 * sizeof(Rune), &runeinrange)));
}
{ 0x1da87, 0x1da8b }
};
+extern int runecmp(const void* a, const void* b);
+
extern int runeinrange(const void* a, const void* b);
bool ispunctuationrune(Rune ch) {
- return ((NULL != bsearch(&ch, singles, 55, sizeof(Rune), &runeinrange)) ||
+ return ((NULL != bsearch(&ch, singles, 55, sizeof(Rune), &runecmp)) ||
(NULL != bsearch(&ch, ranges, 106, 2 * sizeof(Rune), &runeinrange)));
}
--- /dev/null
+#include <carl.h>
+
+/* Used by rune type checking functions to find the rune in the type tables */
+int runecmp(const void* a, const void* b) {
+ Rune* ra = (Rune*)a;
+ Rune* rb = (Rune*)b;
+ return *ra - *rb;
+}
+
#include <carl.h>
-static Rune singles[6] = {
+static Rune singles[14] = {
+ 0x20,
0x85,
0xa0,
0x1680,
+ 0x1680,
+ 0x2000,
+ 0x200a,
+ 0x2028,
+ 0x2029,
0x202f,
0x205f,
+ 0x205f,
+ 0x3000,
0x3000
};
-static Rune ranges[4][2] = {
+static Rune ranges[13][2] = {
{ 0x9, 0xd },
{ 0x1c, 0x20 },
- { 0x2000, 0x200a },
+ { 0x2000, 0x2001 },
+ { 0x2001, 0x2002 },
+ { 0x2002, 0x2003 },
+ { 0x2003, 0x2004 },
+ { 0x2004, 0x2005 },
+ { 0x2005, 0x2006 },
+ { 0x2006, 0x2007 },
+ { 0x2007, 0x2008 },
+ { 0x2008, 0x2009 },
+ { 0x2009, 0x200a },
{ 0x2028, 0x2029 }
};
+extern int runecmp(const void* a, const void* b);
+
extern int runeinrange(const void* a, const void* b);
bool isspacerune(Rune ch) {
- return ((NULL != bsearch(&ch, singles, 6, sizeof(Rune), &runeinrange)) ||
- (NULL != bsearch(&ch, ranges, 4, 2 * sizeof(Rune), &runeinrange)));
+ return ((NULL != bsearch(&ch, singles, 14, sizeof(Rune), &runecmp)) ||
+ (NULL != bsearch(&ch, ranges, 13, 2 * sizeof(Rune), &runeinrange)));
}
{ 0x1f980, 0x1f984 }
};
+extern int runecmp(const void* a, const void* b);
+
extern int runeinrange(const void* a, const void* b);
bool issymbolrune(Rune ch) {
- return ((NULL != bsearch(&ch, singles, 70, sizeof(Rune), &runeinrange)) ||
+ return ((NULL != bsearch(&ch, singles, 70, sizeof(Rune), &runecmp)) ||
(NULL != bsearch(&ch, ranges, 144, 2 * sizeof(Rune), &runeinrange)));
}
{ 0x1fa8, 0x1faf }
};
+extern int runecmp(const void* a, const void* b);
+
extern int runeinrange(const void* a, const void* b);
bool istitlerune(Rune ch) {
- return ((NULL != bsearch(&ch, singles, 7, sizeof(Rune), &runeinrange)) ||
+ return ((NULL != bsearch(&ch, singles, 7, sizeof(Rune), &runecmp)) ||
(NULL != bsearch(&ch, ranges, 3, 2 * sizeof(Rune), &runeinrange)));
}
{ 0x118bf, 0x118df },
};
-extern int runeinrange(const void* a, const void* b);
+extern int runecmp(const void* a, const void* b);
Rune tolowerrune(Rune ch) {
- Rune* to = bsearch(&ch, mappings, 1233, 2 * sizeof(Rune), &runeinrange);
+ Rune* to = bsearch(&ch, mappings, 1233, 2 * sizeof(Rune), &runecmp);
return (to == NULL) ? ch : to[1];
}
{ 0x118df, 0x118bf },
};
-extern int runeinrange(const void* a, const void* b);
+extern int runecmp(const void* a, const void* b);
Rune totitlerune(Rune ch) {
- Rune* to = bsearch(&ch, mappings, 1245, 2 * sizeof(Rune), &runeinrange);
+ Rune* to = bsearch(&ch, mappings, 1245, 2 * sizeof(Rune), &runecmp);
return (to == NULL) ? ch : to[1];
}
{ 0x118df, 0x118bf },
};
-extern int runeinrange(const void* a, const void* b);
+extern int runecmp(const void* a, const void* b);
Rune toupperrune(Rune ch) {
- Rune* to = bsearch(&ch, mappings, 1241, 2 * sizeof(Rune), &runeinrange);
+ Rune* to = bsearch(&ch, mappings, 1241, 2 * sizeof(Rune), &runecmp);
return (to == NULL) ? ch : to[1];
}
{ 0x1d790, 0x1d7a8 }
};
+extern int runecmp(const void* a, const void* b);
+
extern int runeinrange(const void* a, const void* b);
bool isupperrune(Rune ch) {
- return ((NULL != bsearch(&ch, singles, 539, sizeof(Rune), &runeinrange)) ||
+ return ((NULL != bsearch(&ch, singles, 539, sizeof(Rune), &runecmp)) ||
(NULL != bsearch(&ch, ranges, 86, 2 * sizeof(Rune), &runeinrange)));
}
RUN_EXTERN_TEST_SUITE(RefCount);
RUN_EXTERN_TEST_SUITE(SList);
RUN_EXTERN_TEST_SUITE(BSTree);
+ RUN_EXTERN_TEST_SUITE(UnicodeData);
return (PRINT_TEST_RESULTS());
}
--- /dev/null
+// Unit Test Framework Includes
+#include "atf.h"
+
+// File To Test
+#include <carl.h>
+
+const char database[] = "./tools/UnicodeData-8.0.0.txt";
+
+char line[1024] = {0};
+
+typedef struct {
+ uint32_t value;
+ char* gencat;
+ char* bidircat;
+ uint32_t tolower;
+ uint32_t toupper;
+ uint32_t totitle;
+} codept_t;
+
+typedef bool (*isrune_fn_t)(Rune);
+
+typedef struct {
+ char* category;
+ isrune_fn_t* fntable;
+} cp_table_t;
+
+const cp_table_t runetypes[] = {
+ { "Lu", (isrune_fn_t[]){ isalpharune, isupperrune, NULL } },
+ { "Ll", (isrune_fn_t[]){ isalpharune, islowerrune, NULL } },
+ { "Lt", (isrune_fn_t[]){ isalpharune, istitlerune, NULL } },
+ { "LC", (isrune_fn_t[]){ isalpharune, isotherletterrune, NULL } },
+ { "Lm", (isrune_fn_t[]){ isalpharune, isotherletterrune, NULL } },
+ { "Lo", (isrune_fn_t[]){ isalpharune, isotherletterrune, NULL } },
+ { "Mn", (isrune_fn_t[]){ ismarkrune, NULL } },
+ { "Mc", (isrune_fn_t[]){ ismarkrune, NULL } },
+ { "Me", (isrune_fn_t[]){ ismarkrune, NULL } },
+ { "Nd", (isrune_fn_t[]){ isnumberrune, isdigitrune, NULL } },
+ { "Nl", (isrune_fn_t[]){ isnumberrune, NULL } },
+ { "No", (isrune_fn_t[]){ isnumberrune, NULL } },
+ { "Pc", (isrune_fn_t[]){ ispunctuationrune, NULL } },
+ { "Pd", (isrune_fn_t[]){ ispunctuationrune, NULL } },
+ { "Ps", (isrune_fn_t[]){ ispunctuationrune, NULL } },
+ { "Pe", (isrune_fn_t[]){ ispunctuationrune, NULL } },
+ { "Pi", (isrune_fn_t[]){ ispunctuationrune, NULL } },
+ { "Pf", (isrune_fn_t[]){ ispunctuationrune, NULL } },
+ { "Po", (isrune_fn_t[]){ ispunctuationrune, NULL } },
+ { "Sm", (isrune_fn_t[]){ issymbolrune, NULL } },
+ { "Sc", (isrune_fn_t[]){ issymbolrune, NULL } },
+ { "Sk", (isrune_fn_t[]){ issymbolrune, NULL } },
+ { "So", (isrune_fn_t[]){ issymbolrune, NULL } },
+ { "Zs", (isrune_fn_t[]){ isspacerune, NULL } },
+ { "Zl", (isrune_fn_t[]){ isspacerune, NULL } },
+ { "Zp", (isrune_fn_t[]){ isspacerune, NULL } },
+ { "Cc", (isrune_fn_t[]){ iscontrolrune, NULL } },
+ { "Cf", (isrune_fn_t[]){ isotherrune, NULL } },
+ { "Cs", (isrune_fn_t[]){ isotherrune, NULL } },
+ { "Co", (isrune_fn_t[]){ isotherrune, NULL } },
+ { "Cn", (isrune_fn_t[]){ isotherrune, NULL } },
+ { NULL, (isrune_fn_t[]){ isotherrune, NULL } }
+};
+
+isrune_fn_t* lookup_codepoint(codept_t* cp) {
+ cp_table_t* types = &runetypes[0];
+ while((*types).category) {
+ if (0 == strcmp((*types).category, cp->gencat))
+ return (*types).fntable;
+ types++;
+ }
+ return NULL;
+}
+
+bool read_codepoint(codept_t* ch, FILE* db) {
+ if (fgets(line, 1023, db)) {
+ ch->value = strtoul(strtok(line, ";"), NULL, 16);
+ strtok(NULL, ";"); // Description
+ ch->gencat = strdup(strtok(NULL, ";")); // General Category
+ strtok(NULL, ";"); // Class
+ ch->bidircat = strdup(strtok(NULL, ";")); // Bidirectional Category
+ strtok(NULL, ";"); // Decompositional Map
+ strtok(NULL, ";"); // Decimal Value
+ strtok(NULL, ";"); // Digit Value
+ strtok(NULL, ";"); // Numeric Value
+ strtok(NULL, ";"); // Mirrored
+ strtok(NULL, ";"); // UTF 1.0 Name
+ strtok(NULL, ";"); // Comment
+ strtok(NULL, ";"); // To Upper
+ strtok(NULL, ";"); // To Lower
+ strtok(NULL, ";"); // To Title
+ return true;
+ }
+ return false;
+}
+
+void free_codepoint(codept_t* cp) {
+ free(cp->gencat);
+ free(cp->bidircat);
+}
+
+//-----------------------------------------------------------------------------
+// Begin Unit Tests
+//-----------------------------------------------------------------------------
+TEST_SUITE(UnicodeData) {
+ TEST(Verify_runetype_functions) {
+ bool test_passing = true;
+ codept_t cp;
+ FILE* db = fopen(database, "r");
+ while(test_passing && read_codepoint(&cp, db)) {
+ isrune_fn_t* fns = lookup_codepoint(&cp);
+ while(fns && *fns) {
+ bool test_passing = (*fns)(cp.value);
+ if (!test_passing) {
+ fprintf(stderr, "Failed to handle codepoint: %x\n", cp.value);
+ break;
+ }
+ fns++;
+ }
+ free_codepoint(&cp);
+ }
+ fclose(db);
+ }
+}
require 'fileutils'
require 'stringio'
-# Check if we have the right number of arguments, bail otherwise
-if ARGV.length != 2 then
- puts "Usage: unicode.rb DATABASE OUTDIR"
- puts "\nError: Incorrect number of arguments"
- exit 1
-end
+Database = "./tools/UnicodeData-8.0.0.txt"
+OutputDir = "./source/utf"
+TestFile = "./tests/utf/test_unicodedata.c"
# Struct definition for representing a unicode character
UnicodeChar = Struct.new(
"Cn" => [:other] # Unassigned
}
+# List of generated test cases for each codepoint
+$tests = {}
+
#------------------------------------------------------------------------------
# Register a character in the designated type tables combining adjacent
else
$types[type] << val
end
+ $tests[val.value] = []
+ $tests[val.value] << "is#{type.to_s.gsub(/s$/,'')}rune(#{val.value})"
+ $tests[val.value] << "#{val.tolower} == tolowerrune(#{val.value})" if val.tolower > 0
+ $tests[val.value] << "#{val.toupper} == toupperrune(#{val.value})" if val.toupper > 0
+ $tests[val.value] << "#{val.totitle} == totitlerune(#{val.value})" if val.totitle > 0
end
end
# Generate a rune type checking function using the singles and ranges tables
def generate_typecheck_func(type, numranges, numsingles)
out = StringIO.new
+ out.print "extern int runecmp(const void* a, const void* b);\n\n"
out.print "extern int runeinrange(const void* a, const void* b);\n\n"
out.print "bool is#{type.to_s.gsub(/s$/,'')}rune(Rune ch) {\n"
if numsingles == 0
out.print " return (NULL != bsearch(&ch, ranges, #{numranges}, 2 * sizeof(Rune), &runeinrange));\n"
else
- out.print " return ((NULL != bsearch(&ch, singles, #{numsingles}, sizeof(Rune), &runeinrange)) || \n"
+ out.print " return ((NULL != bsearch(&ch, singles, #{numsingles}, sizeof(Rune), &runecmp)) || \n"
out.print " (NULL != bsearch(&ch, ranges, #{numranges}, 2 * sizeof(Rune), &runeinrange)));\n"
end
out.print "}\n"
# Generates rune type tables organized by singles and ranges
def generate_type_tables(type)
ranges, singles = $types[type].partition {|e| e.kind_of? Array }
- puts "Generating #{ARGV[1]}/#{type.to_s}.c"
- File.open("#{ARGV[1]}/#{type.to_s}.c", "w") do |f|
+ puts "Generating #{OutputDir}/#{type.to_s}.c"
+ File.open("#{OutputDir}/#{type.to_s}.c", "w") do |f|
f.puts("#include <carl.h>\n\n")
table = singles.map{|e| "0x#{e.value.to_s(16)}" }.join(",\n ")
f.print("static Rune singles[#{singles.length}] = {\n #{table}\n};\n\n") if singles.length > 0
def generate_to_func(type, tblsz)
out = StringIO.new
- out.print "extern int runeinrange(const void* a, const void* b);\n\n"
+ out.print "extern int runecmp(const void* a, const void* b);\n\n"
out.print "Rune #{type}rune(Rune ch) {\n"
- out.print " Rune* to = bsearch(&ch, mappings, #{tblsz}, 2 * sizeof(Rune), &runeinrange);\n"
+ out.print " Rune* to = bsearch(&ch, mappings, #{tblsz}, 2 * sizeof(Rune), &runecmp);\n"
out.print " return (to == NULL) ? ch : to[1];\n"
out.print "}\n"
out.string
def generate_to_table(type)
mappings = $types[:all].select{|e| e[type] > 0 }
- puts "Generating #{ARGV[1]}/#{type.to_s}.c"
- File.open("#{ARGV[1]}/#{type.to_s}.c", "w") do |f|
+ puts "Generating #{OutputDir}/#{type.to_s}.c"
+ File.open("#{OutputDir}/#{type.to_s}.c", "w") do |f|
f.print "#include <carl.h>\n\n"
f.print "static Rune mappings[#{mappings.length}][2] = {\n"
mappings.each do |e|
end
end
+def generate_test_file()
+ puts "Generating #{TestFile}"
+ File.open(TestFile, "w") do |f|
+ f.print("#include <atf.h>\n")
+ f.print("#include <carl.h>\n")
+ f.print("\nTEST_SUITE(UnicodeData) {\n")
+ $tests.each_pair do |codept,tests|
+ if codept < 250000
+ f.print(" TEST(Codepoint_#{codept.to_s(16)}) {\n")
+ tests.each {|t| f.print " CHECK(#{t});\n" }
+ f.print(" }\n")
+ end
+ end
+ f.print("}\n")
+ end
+end
+
#------------------------------------------------------------------------------
# Read in the unicode character database and sort it into type classes
-unicode_data = File.open(ARGV[0],"r")
+unicode_data = File.open(Database,"r")
unicode_data.each_line do |data|
char = UnicodeChar.new(*data.split(';'))
char.value = char.value.to_i(16)
$types[:all] << char
if (bicat == "WS") || (bicat == "S") || (bicat == "B")
register_codepoint([:spaces], char)
- else
- register_codepoint(types, char)
end
+ register_codepoint(types, char)
end
unicode_data.close()
# Generate the runetype files into the designated directory
-FileUtils.mkdir_p ARGV[1]
+FileUtils.mkdir_p OutputDir
$typemap.values.flatten.uniq.each do |type|
generate_type_tables(type)
end
generate_to_table(:tolower)
generate_to_table(:toupper)
generate_to_table(:totitle)
+#generate_test_file()