From 317c396fd47f7f39b75e737e040da4a650d70bdc Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Tue, 19 Dec 2017 09:02:22 -0500 Subject: [PATCH] fixed some bugs in utf8 decoding --- lib/cfg.ml | 2 +- lib/rope.ml | 30 +++++++++++++++++------------- lib/rope.mli | 1 + tests/rope_tests.ml | 27 +++++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 14 deletions(-) diff --git a/lib/cfg.ml b/lib/cfg.ml index a107b57..1f5a30c 100644 --- a/lib/cfg.ml +++ b/lib/cfg.ml @@ -27,7 +27,7 @@ let cmd_tags = strvar "tide.ui.font" "Quit Undo Redo Cut Copy Paste | Send Find " (* font settings *) -let font = strvar "tide.ui.tags.edit" "Verdana:size=11" +let font = strvar "tide.ui.tags.edit" "Verdana:size=10" let line_spacing = intvar "tide.ui.line_spacing" 1 (* user interface related options *) diff --git a/lib/rope.ml b/lib/rope.ml index 402a900..1daa943 100644 --- a/lib/rope.ml +++ b/lib/rope.ml @@ -70,26 +70,30 @@ let rec utfbeg rope pos = else pos +let runeerr msg = + print_endline msg; 0xFFFD + let rec decode rope i len rune = - let byte = (getc rope i) in - if len == 0 then - (rune, i) - else if not (is_cont_byte byte) then - (0xFFFD, i) + if len == 0 then (rune, i) else - decode rope (i + 1) (len - 1) ((rune lsl 6) lor (byte land 0x3F)) + let byte = (getc rope i) in + if not (is_cont_byte byte) then + (runeerr "missing cont. byte", i) + else + decode rope (i + 1) (len - 1) ((rune lsl 6) lor (byte land 0x3F)) let get_rune rope i = let byte = (getc rope i) in - if byte < 128 then - (byte, i+1) - else if byte >= 245 || byte == 192 || byte == 193 then - (0xFFFD, i+1) + if byte == 192 || byte == 193 then + (runeerr "invalid utf8 byte", i + 1) else - let byte = (getc rope i) in - let len = (utfseq byte) in + let byte = (getc rope i) and len = (utfseq byte) in try decode rope (i + 1) (len - 1) (byte land utf8_seqmask.(len)) - with _ -> (0xFFFD, i + 1) + with e -> + (runeerr "failure decoding", i + 1) + +let getr rope i = + let rune, next = get_rune rope i in rune let rec each_rune_rec fn rope pos = if pos < (length rope) then diff --git a/lib/rope.mli b/lib/rope.mli index 5255852..51389bd 100644 --- a/lib/rope.mli +++ b/lib/rope.mli @@ -25,6 +25,7 @@ val iteri : (int -> rune -> bool) -> rope -> int -> unit val each_rune : (int -> rune -> bool) -> rope -> int -> unit val getc : rope -> int -> rune +val getr : rope -> int -> rune (* val putc : rope -> int -> rune -> rope *) diff --git a/tests/rope_tests.ml b/tests/rope_tests.ml index 5cec045..4a78811 100644 --- a/tests/rope_tests.ml +++ b/tests/rope_tests.ml @@ -239,3 +239,30 @@ let () = (* to_eol() tests *) assert( (to_eol rope 1) == 4 ); ); () + +let () = + test "getr : " (fun () -> + let rope = from_string "\x7F" in + assert( (getr rope 0) == 0x7F ); + ); + test "getr : " (fun () -> + let rope = from_string "\xDF\xBF" in + assert( (getr rope 0) == 0x7FF ); + ); + test "getr : " (fun () -> + let rope = from_string "\xEF\xBF\xBF" in + assert( (getr rope 0) == 0xFFFF ); + ); + test "getr : " (fun () -> + let rope = from_string "\xF7\xBF\xBF\xBF" in + assert( (getr rope 0) == 0x1FFFFF ); + ); + test "getr : " (fun () -> + let rope = from_string "\xFB\xBF\xBF\xBF\xBF" in + assert( (getr rope 0) == 0x3FFFFFF ); + ); + test "getr : " (fun () -> + let rope = from_string "\xFD\xBF\xBF\xBF\xBF\xBF" in + assert( (getr rope 0) == 0x7FFFFFFF ); + ); + () -- 2.52.0