From 279fb2801922ed7ead93bc794c6de15d4925c735 Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Mon, 11 Dec 2017 20:55:50 -0500 Subject: [PATCH] first attempt at extending lexers --- lib/buf.ml | 8 +++--- lib/lexers/lex_ocaml.mll | 40 ++++++++++++++++++++++++++ lib/lexers/lex_ruby.mll | 62 ++++++++++++++++++++++++++++++++++++++++ lib/lexers/lex_text.mll | 8 ++++++ 4 files changed, 114 insertions(+), 4 deletions(-) create mode 100644 lib/lexers/lex_ocaml.mll create mode 100644 lib/lexers/lex_ruby.mll create mode 100644 lib/lexers/lex_text.mll diff --git a/lib/buf.ml b/lib/buf.ml index 63f8642..8803fc2 100644 --- a/lib/buf.ml +++ b/lib/buf.ml @@ -24,12 +24,12 @@ let filetypes = [ exts = [".c"; ".h"; ".cpp"; ".hpp"; ".cc"; ".c++"; ".cxx"] }; { - syntax = Lex_cpp.scan; + syntax = Lex_ruby.scan; names = ["Rakefile"; "rakefile"; "gpkgfile"]; exts = [".rb"] }; { - syntax = Lex_cpp.scan; + syntax = Lex_ocaml.scan; names = []; exts = [".ml"; ".mll"; "mli"] } @@ -43,10 +43,10 @@ let pick_syntax path = (List.exists ((=) ext) ftype.exts) in match (List.find_opt match_ftype filetypes) with | Some ft -> ft.syntax - | None -> Lex_cpp.scan + | None -> Lex_text.scan let empty = - { lexfn = Lex_cpp.scan; + { lexfn = Lex_text.scan; path = ""; rope = Rope.empty } diff --git a/lib/lexers/lex_ocaml.mll b/lib/lexers/lex_ocaml.mll new file mode 100644 index 0000000..6e23b6c --- /dev/null +++ b/lib/lexers/lex_ocaml.mll @@ -0,0 +1,40 @@ +{ open Colormap } + +let oct = ['0'-'7'] +let dec = ['0'-'9'] +let hex = ['0'-'9' 'a'-'f' 'A'-'F'] + +let alpha_ = ['a'-'z' 'A'-'Z' '_'] +let alnum_ = (alpha_ | dec) + +let identifier = alpha_ alnum_* +let number = (dec+ | '0' ['o''O'] oct+ | '0' ['x''X'] hex+) +let character = "'" ([^'\\' '\''] | '\\' _) "'" +let string = '"' ([^'\\' '"'] | '\\' _)* '"' +let typedef = ['A'-'Z'] alnum_* +let const = "true" | "false" + +let keyword = "and" | "as" | "assert" | "begin" | "class" | "constraint" | "do" + | "done" | "downto" | "else" | "end" | "exception" | "external" | "for" + | "fun" | "function" | "functor" | "if" | "in" | "include" | "inherit" + | "initializer" | "lazy" | "let" | "match" | "method" | "module" | "mutable" + | "new" | "object" | "of" | "open" | "or" | "private" | "rec" | "sig" + | "struct" | "then" | "to" | "try" | "type" | "val" | "virtual" | "when" + | "while" | "with" + +rule scan ctx = parse + | "(*" { range_start ctx; comment ctx lexbuf } + | number { set_color ctx Constant } + | character { set_color ctx Constant } + | string { set_color ctx Constant } + | const { set_color ctx Constant } + | keyword { set_color ctx Keyword } + | typedef { set_color ctx Type } + | identifier { (* skip *) } + | _ { scan ctx lexbuf } + | eof { raise Eof } + +and comment ctx = parse + | "*)" { range_stop ctx Comment } + | _ { comment ctx lexbuf } + | eof { raise Eof } diff --git a/lib/lexers/lex_ruby.mll b/lib/lexers/lex_ruby.mll new file mode 100644 index 0000000..48fea7c --- /dev/null +++ b/lib/lexers/lex_ruby.mll @@ -0,0 +1,62 @@ +{ open Colormap } + +let oct = ['0'-'9'] +let dec = ['0'-'9'] +let hex = ['0'-'9' 'a'-'f' 'A'-'F'] +let exp = ['e''E'] ['+''-']? dec+ + +let alpha_ = ['a'-'z' 'A'-'Z' '_'] +let alnum_ = (alpha_ | dec) + +let fstyle = ['f' 'F' 'l' 'L'] +let istyle = ['u' 'U' 'l' 'L'] + +let ln_cmt = "//" [^ '\n']* +let character = "'" ([^'\\' '\''] | '\\' _) "'" +let string = '"' ([^'\\' '"'] | '\\' _)* ['"' '\n'] +let identifier = alpha_ alnum_* +let preprocess = "#" [' ' '\t']* alpha_+ +let sys_incl = (' '|'\t')* '<' [^ '\n' '>']* '>' + +let number = ( + dec+ istyle* + | '0' ['x''X'] hex+ istyle* + | dec+ exp? fstyle? + | dec* '.' dec+ exp? fstyle? + | dec+ '.' dec* exp? fstyle? +) + +let const = "true" | "false" | "NULL" + +let keyword = "goto" | "break" | "return" | "continue" | "asm" | "case" + | "default" | "if" | "else" | "switch" | "while" | "for" | "do" | "sizeof" + +let typedef = "bool" | "short" | "int" | "long" | "unsigned" | "signed" | "char" + | "size_t" | "void" | "extern" | "static" | "inline" | "struct" | "enum" + | "typedef" | "union" | "volatile" | "auto" | "const" | "int8_t" | "int16_t" + | "int32_t" | "int64_t" | "uint8_t" | "uint16_t" | "uint32_t" | "uint64_t" + | "float" | "double" + +rule scan ctx = parse + | "/*" { range_start ctx; comment ctx lexbuf } + | ln_cmt { set_color ctx Comment } + | number { set_color ctx Constant } + | character { set_color ctx Constant } + | string { set_color ctx Constant } + | const { set_color ctx Constant } + | keyword { set_color ctx Keyword } + | typedef { set_color ctx Type } + | preprocess { set_color ctx PreProcessor; preproc ctx lexbuf } + | identifier { (* skip *) } + | _ { scan ctx lexbuf } + | eof { raise Eof } + +and comment ctx = parse + | "*/" { range_stop ctx Comment } + | _ { comment ctx lexbuf } + | eof { raise Eof } + +and preproc ctx = parse + | sys_incl { set_color ctx Constant } + | _ { (* skip *) } + | eof { raise Eof } diff --git a/lib/lexers/lex_text.mll b/lib/lexers/lex_text.mll new file mode 100644 index 0000000..b58b158 --- /dev/null +++ b/lib/lexers/lex_text.mll @@ -0,0 +1,8 @@ +{ open Colormap } + +let ident = ['a'-'z' 'A'-'Z']+ + +rule scan ctx = parse + | ident { scan ctx lexbuf } + | _ { scan ctx lexbuf } + | eof { raise Eof } -- 2.49.0