From: Mike Lowis Date: Thu, 6 Oct 2016 16:58:11 +0000 (-0400) Subject: Cleanup unicode handling a bit X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=e7b4f269f6306d5493804b9ab879db627b336663;p=projs%2Ftide.git Cleanup unicode handling a bit --- diff --git a/buf.c b/buf.c index f8b8743..f119fa9 100644 --- a/buf.c +++ b/buf.c @@ -11,13 +11,10 @@ void buf_load(Buf* buf, char* path) { buf->insert_mode = true; unsigned i = 0; + Rune r; FILE* in = (!strcmp(path,"-") ? stdin : fopen(path, "rb")); - while (EOF != fpeekc(in)) { - size_t len = 0; - Rune r = 0; - while (!utf8decode(&r, &len, fgetc(in))); + while (RUNE_EOF != (r = fgetrune(in))) buf_ins(buf, i++, r); - } fclose(in); buf->insert_mode = false; } diff --git a/edit.h b/edit.h index bd6e009..d159651 100644 --- a/edit.h +++ b/edit.h @@ -7,11 +7,11 @@ /* UTF-8 Handling *****************************************************************************/ enum { - UTF_MAX = 6u, /* maximum number of bytes that make up a rune */ - RUNE_SELF = 0x80, /* byte values larger than this are *not* ascii */ - RUNE_ERR = 0xFFFD, /* rune value representing an error */ - RUNE_MAX = 0x10FFFF, /* Maximum decodable rune value */ - RUNE_EOF = EOF /* ruen value representing end of file */ + UTF_MAX = 6u, /* maximum number of bytes that make up a rune */ + RUNE_SELF = 0x80, /* byte values larger than this are *not* ascii */ + RUNE_ERR = 0xFFFD, /* rune value representing an error */ + RUNE_MAX = 0x10FFFF, /* Maximum decodable rune value */ + RUNE_EOF = UINT32_MAX /* ruen value representing end of file */ }; /* Represents a unicode code point */ @@ -19,6 +19,8 @@ typedef uint32_t Rune; size_t utf8encode(char str[UTF_MAX], Rune rune); bool utf8decode(Rune* rune, size_t* length, int byte); +Rune fgetrune(FILE* f); +void fputrune(Rune rune, FILE* f); /* Input Handling *****************************************************************************/ diff --git a/utf8.c b/utf8.c index 02074a2..6c301e0 100644 --- a/utf8.c +++ b/utf8.c @@ -9,14 +9,14 @@ const uint8_t UTF8_SeqBits[] = { 0x00u, 0x80u, 0xC0u, 0xE0u, 0xF0u, 0xF8u, 0xFCu const uint8_t UTF8_SeqMask[] = { 0x00u, 0xFFu, 0x1Fu, 0x0Fu, 0x07u, 0x03u, 0x01u, 0x00u }; const uint8_t UTF8_SeqLens[] = { 0x01u, 0x00u, 0x02u, 0x03u, 0x04u, 0x05u, 0x06u, 0x00u }; -bool runevalid(Rune val) { +static bool runevalid(Rune val) { return (val <= RUNE_MAX) && ((val & 0xFFFEu) != 0xFFFEu) && ((val < 0xD800u) || (val > 0xDFFFu)) && ((val < 0xFDD0u) || (val > 0xFDEFu)); } -size_t runelen(Rune rune) { +static size_t runelen(Rune rune) { if(!runevalid(rune)) return 0; else if(rune <= 0x7F) @@ -29,7 +29,7 @@ size_t runelen(Rune rune) { return 4; } -uint8_t utfseq(uint8_t byte) { +static uint8_t utfseq(uint8_t byte) { for (int i = 1; i < 8; i++) if ((byte & UTF8_SeqBits[i]) == UTF8_SeqBits[i-1]) return UTF8_SeqLens[i-1]; @@ -74,14 +74,6 @@ bool utf8decode(Rune* rune, size_t* length, int byte) { return ((*length == 0) || (*rune == RUNE_ERR)); } -size_t utflen(const char* s) { - size_t len = 0; - Rune rune = 0; - while (*s && !utf8decode(&rune, &len, *(s++))) - len++; - return len; -} - Rune fgetrune(FILE* f) { Rune rune = 0; size_t length = 0;