From: Michael D. Lowis Date: Fri, 15 Dec 2017 17:34:53 +0000 (-0500) Subject: fixed rope.ml to not choke on malformed utf-8 docs X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=2fcad17a32100d4c9ee5f0fc003dfae5eb3a9e37;p=archive%2Ftide-ocaml.git fixed rope.ml to not choke on malformed utf-8 docs --- diff --git a/lib/rope.ml b/lib/rope.ml index ada0a4d..c3c8006 100644 --- a/lib/rope.ml +++ b/lib/rope.ml @@ -57,34 +57,48 @@ let is_cont_byte c = let utfseq byte = try - for i = 1 to 8 do + for i = 1 to 7 do if ((byte land utf8_seqbits.(i)) = utf8_seqbits.(i-1)) then raise (Return utf8_seqlens.(i-1)) done; raise (Return 1) with Return v -> v -let rec getr rope i = - let pos = ref i and rune = ref 0 in - while ((!pos > 0) && (is_cont_byte (getc rope !pos))) do - pos := !pos - 1 - done; - let byte = (getc rope !pos) in - let len = ref (utfseq byte) in - rune := byte land utf8_seqmask.(!len); - pos := !pos + 1; - while !len > 1 do - rune := (!rune lsl 6) lor ((getc rope !pos) land 0x3F); - pos := !pos + 1; - len := !len - 1; - done; - (!rune, !pos) +let rec utfbeg rope pos = + if (pos > 0) && (is_cont_byte (getc rope pos)) then + utfbeg rope (pos - 1) + else + pos + +let rec decode rope i len rune = + let byte = (getc rope i) in + if not (is_cont_byte byte) then + (0xFFFD, i + 1) + else if len == 0 then + (rune, i + 1) + else + decode rope (i + 1) (len - 1) ((rune lsl 6) lor (byte land 0x3F)) + +let get_rune rope i = + let byte = (getc rope i) in + if byte < 128 then + (byte, i+1) + else if byte >= 245 || byte == 192 || byte == 193 then + (0xFFFD, i+1) + else + let byte = (getc rope i) in + let len = (utfseq byte) in + try decode rope (i + 1) len (byte land utf8_seqmask.(len)) + with _ -> (0xFFFD, i + 1) -let rec each_rune fn rope pos = +let rec each_rune_rec fn rope pos = if pos < (length rope) then - let rune, next = getr rope pos in + let rune, next = get_rune rope pos in if (fn pos rune) then - each_rune fn rope next + each_rune_rec fn rope next + +let rec each_rune fn rope pos = + each_rune_rec fn rope (utfbeg rope pos) (******************************************************************************)