let utfseq byte =
try
- for i = 1 to 8 do
+ for i = 1 to 7 do
if ((byte land utf8_seqbits.(i)) = utf8_seqbits.(i-1)) then
raise (Return utf8_seqlens.(i-1))
done;
raise (Return 1)
with Return v -> v
-let rec getr rope i =
- let pos = ref i and rune = ref 0 in
- while ((!pos > 0) && (is_cont_byte (getc rope !pos))) do
- pos := !pos - 1
- done;
- let byte = (getc rope !pos) in
- let len = ref (utfseq byte) in
- rune := byte land utf8_seqmask.(!len);
- pos := !pos + 1;
- while !len > 1 do
- rune := (!rune lsl 6) lor ((getc rope !pos) land 0x3F);
- pos := !pos + 1;
- len := !len - 1;
- done;
- (!rune, !pos)
+let rec utfbeg rope pos =
+ if (pos > 0) && (is_cont_byte (getc rope pos)) then
+ utfbeg rope (pos - 1)
+ else
+ pos
+
+let rec decode rope i len rune =
+ let byte = (getc rope i) in
+ if not (is_cont_byte byte) then
+ (0xFFFD, i + 1)
+ else if len == 0 then
+ (rune, i + 1)
+ else
+ decode rope (i + 1) (len - 1) ((rune lsl 6) lor (byte land 0x3F))
+
+let get_rune rope i =
+ let byte = (getc rope i) in
+ if byte < 128 then
+ (byte, i+1)
+ else if byte >= 245 || byte == 192 || byte == 193 then
+ (0xFFFD, i+1)
+ else
+ let byte = (getc rope i) in
+ let len = (utfseq byte) in
+ try decode rope (i + 1) len (byte land utf8_seqmask.(len))
+ with _ -> (0xFFFD, i + 1)
-let rec each_rune fn rope pos =
+let rec each_rune_rec fn rope pos =
if pos < (length rope) then
- let rune, next = getr rope pos in
+ let rune, next = get_rune rope pos in
if (fn pos rune) then
- each_rune fn rope next
+ each_rune_rec fn rope next
+
+let rec each_rune fn rope pos =
+ each_rune_rec fn rope (utfbeg rope pos)
(******************************************************************************)