]> git.mdlowis.com Git - archive/tide-ocaml.git/commitdiff
first attempt at extending lexers
authorMichael D. Lowis <mike@mdlowis.com>
Tue, 12 Dec 2017 01:55:50 +0000 (20:55 -0500)
committerMichael D. Lowis <mike@mdlowis.com>
Tue, 12 Dec 2017 01:55:50 +0000 (20:55 -0500)
lib/buf.ml
lib/lexers/lex_ocaml.mll [new file with mode: 0644]
lib/lexers/lex_ruby.mll [new file with mode: 0644]
lib/lexers/lex_text.mll [new file with mode: 0644]

index 63f864254e0a6591bada2ee7181ee403c916554e..8803fc285ba5a568b75f5ab7b7c33af740c77c8c 100644 (file)
@@ -24,12 +24,12 @@ let filetypes = [
     exts   = [".c"; ".h"; ".cpp"; ".hpp"; ".cc"; ".c++"; ".cxx"]
   };
   {
-    syntax = Lex_cpp.scan;
+    syntax = Lex_ruby.scan;
     names  = ["Rakefile"; "rakefile"; "gpkgfile"];
     exts   = [".rb"]
   };
   {
-    syntax = Lex_cpp.scan;
+    syntax = Lex_ocaml.scan;
     names  = [];
     exts   = [".ml"; ".mll"; "mli"]
   }
@@ -43,10 +43,10 @@ let pick_syntax path =
     (List.exists ((=) ext) ftype.exts)
   in match (List.find_opt match_ftype filetypes) with
     | Some ft -> ft.syntax
-    | None -> Lex_cpp.scan
+    | None -> Lex_text.scan
 
 let empty =
-  { lexfn = Lex_cpp.scan;
+  { lexfn = Lex_text.scan;
     path = "";
     rope = Rope.empty }
 
diff --git a/lib/lexers/lex_ocaml.mll b/lib/lexers/lex_ocaml.mll
new file mode 100644 (file)
index 0000000..6e23b6c
--- /dev/null
@@ -0,0 +1,40 @@
+{ open Colormap }
+
+let oct = ['0'-'7']
+let dec = ['0'-'9']
+let hex = ['0'-'9' 'a'-'f' 'A'-'F']
+
+let alpha_ = ['a'-'z' 'A'-'Z' '_']
+let alnum_ = (alpha_ | dec)
+
+let identifier = alpha_ alnum_*
+let number = (dec+ | '0' ['o''O'] oct+ | '0' ['x''X'] hex+)
+let character = "'" ([^'\\' '\''] | '\\' _) "'"
+let string = '"' ([^'\\' '"'] | '\\' _)* '"'
+let typedef = ['A'-'Z'] alnum_*
+let const = "true" | "false"
+
+let keyword = "and" | "as" | "assert" | "begin" | "class" | "constraint" | "do"
+    | "done" | "downto" | "else" | "end" | "exception" | "external" | "for"
+    | "fun" | "function" | "functor" | "if" | "in" | "include" | "inherit"
+    | "initializer" | "lazy" | "let" | "match" | "method" | "module" | "mutable"
+    | "new" | "object" | "of" | "open" | "or" | "private" | "rec" | "sig"
+    | "struct" | "then" | "to" | "try" | "type" | "val" | "virtual" | "when"
+    | "while" | "with"
+
+rule scan ctx = parse
+  | "(*"       { range_start ctx; comment ctx lexbuf }
+  | number     { set_color ctx Constant }
+  | character  { set_color ctx Constant }
+  | string     { set_color ctx Constant }
+  | const      { set_color ctx Constant }
+  | keyword    { set_color ctx Keyword }
+  | typedef    { set_color ctx Type }
+  | identifier { (* skip *) }
+  | _          { scan ctx lexbuf }
+  | eof        { raise Eof }
+
+and comment ctx = parse
+  | "*)" { range_stop ctx Comment }
+  | _    { comment ctx lexbuf }
+  | eof  { raise Eof }
diff --git a/lib/lexers/lex_ruby.mll b/lib/lexers/lex_ruby.mll
new file mode 100644 (file)
index 0000000..48fea7c
--- /dev/null
@@ -0,0 +1,62 @@
+{ open Colormap }
+
+let oct = ['0'-'9']
+let dec = ['0'-'9']
+let hex = ['0'-'9' 'a'-'f' 'A'-'F']
+let exp = ['e''E'] ['+''-']? dec+
+
+let alpha_ = ['a'-'z' 'A'-'Z' '_']
+let alnum_ = (alpha_ | dec)
+
+let fstyle = ['f' 'F' 'l' 'L']
+let istyle = ['u' 'U' 'l' 'L']
+
+let ln_cmt = "//" [^ '\n']*
+let character = "'" ([^'\\' '\''] | '\\' _) "'"
+let string = '"' ([^'\\' '"'] | '\\' _)* ['"' '\n']
+let identifier = alpha_ alnum_*
+let preprocess = "#" [' ' '\t']* alpha_+
+let sys_incl = (' '|'\t')* '<' [^ '\n' '>']* '>'
+
+let number = (
+    dec+ istyle*
+  | '0' ['x''X'] hex+ istyle*
+  | dec+ exp? fstyle?
+  | dec* '.' dec+ exp? fstyle?
+  | dec+ '.' dec* exp? fstyle?
+)
+
+let const = "true" | "false" | "NULL"
+
+let keyword = "goto" | "break" | "return" | "continue" | "asm" | "case"
+    | "default" | "if" | "else" | "switch" | "while" | "for" | "do" | "sizeof"
+
+let typedef = "bool" | "short" | "int" | "long" | "unsigned" | "signed" | "char"
+    | "size_t" | "void" | "extern" | "static" | "inline" | "struct" | "enum"
+    | "typedef" | "union" | "volatile" | "auto" | "const" | "int8_t" | "int16_t"
+    | "int32_t" | "int64_t" | "uint8_t" | "uint16_t" | "uint32_t" | "uint64_t"
+    | "float" | "double"
+
+rule scan ctx = parse
+  | "/*"       { range_start ctx; comment ctx lexbuf }
+  | ln_cmt     { set_color ctx Comment }
+  | number     { set_color ctx Constant }
+  | character  { set_color ctx Constant }
+  | string     { set_color ctx Constant }
+  | const      { set_color ctx Constant }
+  | keyword    { set_color ctx Keyword }
+  | typedef    { set_color ctx Type }
+  | preprocess { set_color ctx PreProcessor; preproc ctx lexbuf }
+  | identifier { (* skip *) }
+  | _          { scan ctx lexbuf }
+  | eof        { raise Eof }
+
+and comment ctx = parse
+  | "*/" { range_stop ctx Comment }
+  | _    { comment ctx lexbuf }
+  | eof  { raise Eof }
+
+and preproc ctx = parse
+  | sys_incl { set_color ctx Constant }
+  | _        { (* skip *) }
+  | eof      { raise Eof }
diff --git a/lib/lexers/lex_text.mll b/lib/lexers/lex_text.mll
new file mode 100644 (file)
index 0000000..b58b158
--- /dev/null
@@ -0,0 +1,8 @@
+{ open Colormap }
+
+let ident = ['a'-'z' 'A'-'Z']+
+
+rule scan ctx = parse
+  | ident { scan ctx lexbuf }
+  | _     { scan ctx lexbuf }
+  | eof   { raise Eof }