Cleanup unicode handling a bit

author Mike Lowis <mike.lowis@gentex.com>

Thu, 6 Oct 2016 16:58:11 +0000 (12:58 -0400)

committer Mike Lowis <mike.lowis@gentex.com>

Thu, 6 Oct 2016 16:58:11 +0000 (12:58 -0400)
author Mike Lowis <mike.lowis@gentex.com>
Thu, 6 Oct 2016 16:58:11 +0000 (12:58 -0400)
committer Mike Lowis <mike.lowis@gentex.com>
Thu, 6 Oct 2016 16:58:11 +0000 (12:58 -0400)
diff --git a/buf.c b/buf.c

index f8b874314e38a36a61a2a9684fbe4e0a0f30e688..f119fa9b511489b198a5b7afda117b917d3cea70 100644 (file)
--- a/buf.c
+++ b/buf.c
@@ -11,13 +11,10 @@ void buf_load(Buf* buf, char* path)
  {
      buf->insert_mode = true;
      unsigned i = 0;
+    Rune r;
      FILE* in = (!strcmp(path,"-") ? stdin : fopen(path, "rb"));
-    while (EOF != fpeekc(in)) {
-        size_t len = 0;
-        Rune r = 0;
-        while (!utf8decode(&r, &len, fgetc(in)));
+    while (RUNE_EOF != (r = fgetrune(in)))
          buf_ins(buf, i++, r);
-    }
      fclose(in);
      buf->insert_mode = false;
  }
diff --git a/edit.h b/edit.h

index bd6e009955c15e46bd8f676cbc5a05a7ba66bfcf..d1596510d836ceaa56b82ecc686fde5959edd258 100644 (file)
--- a/edit.h
+++ b/edit.h
@@ -7,11 +7,11 @@
  /* UTF-8 Handling
   *****************************************************************************/
  enum {
-    UTF_MAX   = 6u,       /* maximum number of bytes that make up a rune */
-    RUNE_SELF = 0x80,     /* byte values larger than this are *not* ascii */
-    RUNE_ERR  = 0xFFFD,   /* rune value representing an error */
-    RUNE_MAX  = 0x10FFFF, /* Maximum decodable rune value */
-    RUNE_EOF  = EOF       /* ruen value representing end of file */
+    UTF_MAX   = 6u,        /* maximum number of bytes that make up a rune */
+    RUNE_SELF = 0x80,      /* byte values larger than this are *not* ascii */
+    RUNE_ERR  = 0xFFFD,    /* rune value representing an error */
+    RUNE_MAX  = 0x10FFFF,  /* Maximum decodable rune value */
+    RUNE_EOF  = UINT32_MAX /* ruen value representing end of file */
  };
  
  /* Represents a unicode code point */
@@ -19,6 +19,8 @@ typedef uint32_t Rune;
  
  size_t utf8encode(char str[UTF_MAX], Rune rune);
  bool utf8decode(Rune* rune, size_t* length, int byte);
+Rune fgetrune(FILE* f);
+void fputrune(Rune rune, FILE* f);
  
  /* Input Handling
   *****************************************************************************/
diff --git a/utf8.c b/utf8.c

index 02074a27d209690c2268b24a424ed0d2a19261d8..6c301e05f64962dc2408c82548e0411fd9533089 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -9,14 +9,14 @@ const uint8_t UTF8_SeqBits[] = { 0x00u, 0x80u, 0xC0u, 0xE0u, 0xF0u, 0xF8u, 0xFCu
  const uint8_t UTF8_SeqMask[] = { 0x00u, 0xFFu, 0x1Fu, 0x0Fu, 0x07u, 0x03u, 0x01u, 0x00u };
  const uint8_t UTF8_SeqLens[] = { 0x01u, 0x00u, 0x02u, 0x03u, 0x04u, 0x05u, 0x06u, 0x00u };
  
-bool runevalid(Rune val) {
+static bool runevalid(Rune val) {
      return (val <= RUNE_MAX)
          && ((val & 0xFFFEu) != 0xFFFEu)
          && ((val < 0xD800u) || (val > 0xDFFFu))
          && ((val < 0xFDD0u) || (val > 0xFDEFu));
  }
  
-size_t runelen(Rune rune) {
+static size_t runelen(Rune rune) {
      if(!runevalid(rune))
          return 0;
      else if(rune <= 0x7F)
@@ -29,7 +29,7 @@ size_t runelen(Rune rune) {
          return 4;
  }
  
-uint8_t utfseq(uint8_t byte) {
+static uint8_t utfseq(uint8_t byte) {
      for (int i = 1; i < 8; i++)
          if ((byte & UTF8_SeqBits[i]) == UTF8_SeqBits[i-1])
              return UTF8_SeqLens[i-1];
@@ -74,14 +74,6 @@ bool utf8decode(Rune* rune, size_t* length, int byte) {
      return ((*length == 0) || (*rune == RUNE_ERR));
  }
  
-size_t utflen(const char* s) {
-    size_t len = 0;
-    Rune rune = 0;
-    while (*s && !utf8decode(&rune, &len, *(s++)))
-        len++;
-    return len;
-}
-
  Rune fgetrune(FILE* f) {
      Rune rune = 0;
      size_t length = 0;
author	Mike Lowis <mike.lowis@gentex.com>
	Thu, 6 Oct 2016 16:58:11 +0000 (12:58 -0400)
committer	Mike Lowis <mike.lowis@gentex.com>
	Thu, 6 Oct 2016 16:58:11 +0000 (12:58 -0400)
buf.c		patch \| blob \| history
edit.h		patch \| blob \| history
utf8.c		patch \| blob \| history