From 42329dc2db5f0f8d098a49b4dd1161be8102b322 Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Thu, 14 Dec 2017 16:14:55 -0500 Subject: [PATCH] First pass at utf8 support. single font, no alternative lookups --- lib/buf.ml | 38 ++++++++++++++++++------------- lib/buf.mli | 20 ++++++++++------ lib/rope.ml | 64 ++++++++++++++++++++++++++++++++++++---------------- lib/rope.mli | 8 ++++++- 4 files changed, 86 insertions(+), 44 deletions(-) diff --git a/lib/buf.ml b/lib/buf.ml index 8803fc2..9de0a6a 100644 --- a/lib/buf.ml +++ b/lib/buf.ml @@ -62,7 +62,7 @@ let length buf = Rope.length buf.rope let iteri fn buf i = - Rope.iteri fn buf.rope i + Rope.each_rune fn buf.rope i let iter fn buf i = iteri (fun i c -> (fn c)) buf i @@ -73,10 +73,10 @@ let make_lexer buf = scanfn = buf.lexfn; lexbuf = Lexing.from_function (fun bytebuf n -> let count = ref 0 in - iteri (fun i c -> + Rope.iteri (fun i c -> Bytes.set bytebuf !count (Char.chr c); incr count; - (!count >= n)) buf !pos; + (!count >= n)) buf.rope !pos; pos := !pos + !count; !count) }) @@ -92,6 +92,17 @@ module Cursor = struct let make buf idx = { start = 0; stop = (Rope.limit_index buf.rope idx) } + let move_to dest buf csr = + csr.stop <- (match dest with + | StartOfLine -> Rope.to_bol buf.rope csr.stop + | EndOfLine -> Rope.to_eol buf.rope csr.stop + | NextChar -> Rope.nextc buf.rope csr.stop + | PrevChar -> Rope.prevc buf.rope csr.stop + | NextLine -> Rope.nextln buf.rope csr.stop + | PrevLine -> Rope.prevln buf.rope csr.stop + ); + csr.stop +(* let clone csr = { start = csr.start; stop = csr.stop } @@ -107,17 +118,6 @@ module Cursor = struct let getc buf csr = Rope.getc buf.rope csr.stop - let move_to dest buf csr = - csr.stop <- (match dest with - | StartOfLine -> Rope.to_bol buf.rope csr.stop - | EndOfLine -> Rope.to_eol buf.rope csr.stop - | NextChar -> Rope.nextc buf.rope csr.stop - | PrevChar -> Rope.prevc buf.rope csr.stop - | NextLine -> Rope.nextln buf.rope csr.stop - | PrevLine -> Rope.prevln buf.rope csr.stop - ); - csr.stop - let nextc = move_to NextChar let prevc = move_to PrevChar let nextln = move_to NextLine @@ -133,15 +133,18 @@ module Cursor = struct let is_bol = is_at StartOfLine let is_eol = is_at EndOfLine +*) end let move_to dest buf i = Cursor.move_to dest buf (Cursor.make buf i) -let nextc = move_to NextChar -let prevc = move_to PrevChar let nextln = move_to NextLine let prevln = move_to PrevLine let bol = move_to StartOfLine + +(* +let nextc = move_to NextChar +let prevc = move_to PrevChar let eol = move_to EndOfLine let eol = move_to EndOfLine @@ -149,3 +152,6 @@ let is_at dest buf i = Cursor.is_at dest buf (Cursor.make buf i) let is_bol = is_at StartOfLine let is_eol = is_at EndOfLine +*) + + diff --git a/lib/buf.mli b/lib/buf.mli index a6ddd72..8462596 100644 --- a/lib/buf.mli +++ b/lib/buf.mli @@ -5,6 +5,7 @@ type dest = | NextChar | PrevChar | NextLine | PrevLine +(* module Cursor : sig type t @@ -15,11 +16,11 @@ module Cursor : sig val goto : buf -> t -> int -> unit val getc : buf -> t -> int -(* +( * val putc : buf -> t -> int -> unit val gets : buf -> t -> string val puts : buf -> t -> string -> unit -*) +* ) val move_to : dest -> buf -> t -> int val nextc : buf -> t -> int @@ -33,25 +34,30 @@ module Cursor : sig val is_bol : buf -> t -> bool val is_eol : buf -> t -> bool end +*) val empty : t val load : string -> t val path : t -> string val length : t -> int -val iter : (int -> bool) -> t -> int -> unit val iteri : (int -> int -> bool) -> t -> int -> unit +val iter : (int -> bool) -> t -> int -> unit +val make_lexer : t -> Colormap.lexer -val move_to : dest -> t -> int -> int -val nextc : t -> int -> int -val prevc : t -> int -> int val nextln : t -> int -> int val prevln : t -> int -> int val bol : t -> int -> int + +(* +val move_to : dest -> t -> int -> int +val nextc : t -> int -> int +val prevc : t -> int -> int + val eol : t -> int -> int val is_at : dest -> t -> int -> bool val is_bol : t -> int -> bool val is_eol : t -> int -> bool +*) -val make_lexer : t -> Colormap.lexer diff --git a/lib/rope.ml b/lib/rope.ml index 86ff9ef..ada0a4d 100644 --- a/lib/rope.ml +++ b/lib/rope.ml @@ -44,31 +44,55 @@ let rec getc rope i = else getc r (i - left_len) +(* UTF-8 **********************************************************************) + +exception Return of int + +let utf8_seqbits = [| 0x00; 0x80; 0xC0; 0xE0; 0xF0; 0xF8; 0xFC; 0xFE |] +let utf8_seqlens = [| 0x01; 0x00; 0x02; 0x03; 0x04; 0x05; 0x06; 0x00 |] +let utf8_seqmask = [| 0x00; 0xFF; 0x1F; 0x0F; 0x07; 0x03; 0x01; 0x00 |] + +let is_cont_byte c = + ((c land 0xC0) == 0x80) + +let utfseq byte = + try + for i = 1 to 8 do + if ((byte land utf8_seqbits.(i)) = utf8_seqbits.(i-1)) then + raise (Return utf8_seqlens.(i-1)) + done; + raise (Return 1) + with Return v -> v + +let rec getr rope i = + let pos = ref i and rune = ref 0 in + while ((!pos > 0) && (is_cont_byte (getc rope !pos))) do + pos := !pos - 1 + done; + let byte = (getc rope !pos) in + let len = ref (utfseq byte) in + rune := byte land utf8_seqmask.(!len); + pos := !pos + 1; + while !len > 1 do + rune := (!rune lsl 6) lor ((getc rope !pos) land 0x3F); + pos := !pos + 1; + len := !len - 1; + done; + (!rune, !pos) + +let rec each_rune fn rope pos = + if pos < (length rope) then + let rune, next = getr rope pos in + if (fn pos rune) then + each_rune fn rope next + +(******************************************************************************) + (* inefficient form of iteri *) let rec iteri fn rope pos = if pos < (length rope) && (fn pos (getc rope pos)) then iteri fn rope (pos + 1) -(* More efficient form of iteri? -exception Break_loop - -let iteri_leaf fn pos str off len = - let offset = pos - off in - for i = off to off + len - 1 do - if (fn (i + offset) (Char.code str.[i])) == false then - raise Break_loop - done - -let rec iteri fn rope pos = - match rope with - | Leaf (str, off, len) -> - (try iteri_leaf fn pos str off len - with Break_loop -> ()) - | Node (l,r,_,_) -> - iteri fn l pos; - iteri fn r (pos + (length l)) -*) - let gets rope i j = let buf = Bytes.create (j - i) in iteri diff --git a/lib/rope.mli b/lib/rope.mli index 17c7948..5255852 100644 --- a/lib/rope.mli +++ b/lib/rope.mli @@ -16,20 +16,24 @@ val height : rope -> int val limit_index : rope -> int -> int val last : rope -> int -val flatten : rope -> rope val join : rope -> rope -> rope +val flatten : rope -> rope val split : rope -> int -> (rope * rope) val del : rope -> int -> int -> rope val iteri : (int -> rune -> bool) -> rope -> int -> unit +val each_rune : (int -> rune -> bool) -> rope -> int -> unit val getc : rope -> int -> rune +(* val putc : rope -> int -> rune -> rope +*) val gets : rope -> int -> int -> string val puts : rope -> string -> int -> rope val nextc : rope -> int -> int val prevc : rope -> int -> int + val nextln : rope -> int -> int val prevln : rope -> int -> int @@ -38,3 +42,5 @@ val is_eol : rope -> int -> bool val to_bol : rope -> int -> int val to_eol : rope -> int -> int + + -- 2.52.0