From 96482492bb1a2ee3df930742592eda6e8e3b1c27 Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Sun, 19 Apr 2020 14:19:17 -0400 Subject: [PATCH] added prototype dynamically typed language compiler. Attempting to proveout closure conversion and one-pass compilation techniques --- .gitignore | 1 + dyn.rb | 424 ++++++++++++++++++++++++++++++++++++++++++++++++ dyn.src | 30 ++++ dyn2.rb | 467 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 922 insertions(+) create mode 100644 .gitignore create mode 100755 dyn.rb create mode 100644 dyn.src create mode 100755 dyn2.rb diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cba7efc --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +a.out diff --git a/dyn.rb b/dyn.rb new file mode 100755 index 0000000..ccac77b --- /dev/null +++ b/dyn.rb @@ -0,0 +1,424 @@ +#!/usr/bin/env ruby + +require 'strscan' + +BuiltinSyms = {} + +class Symtable + def initialize + @builtins = BuiltinSyms + @scopes = [{}] + end + + def []=(k,v) + @scopes.last[k] = v + end + + def [](k) + (@scopes.map{|h| h[k] }.compact.last || BuiltinSyms[k]) + end + + def scope_start + @scopes.push({}) + end + + def scope_stop + @scopes.pop() + end +end + +class Lexer + Tok = Struct.new(:text, :file, :pos, :type) + SPACE = /([ \t\v\n\r]+|#.*\n)/ + IDENT = /[_a-zA-Z][_a-zA-Z0-9]*/ + BRACES = /[\(\)\[\]\{\}\.]/ + OPERATORS = /[*\/=+-:\$?]/ + NUMBER = /[0-9]+(\.[0-9]+)?/ + STRING = /"(\\"|[^"])*"/ + ID_TYPES = { + "nil" => :nil, + "true" => :bool, + "false" => :bool, + "if" => :if, + "else" => :else, + "end" => :end, + "let" => :let, + "fun" => :fun, + } + + attr_accessor :file + attr_accessor :data + + def initialize(path) + @file = path + @text = File.read(path) + @data = StringScanner.new(@text) + end + + def get_id_type(str) + ID_TYPES[str] || :ident + end + + def next + while @data.skip(SPACE) do + end + if not @data.eos? + type = :eof + if @data.scan(IDENT) + type = get_id_type(@data.matched) + elsif @data.scan(NUMBER) + type = :num + elsif @data.scan(STRING) + type = :string + elsif @data.scan(BRACES) + type = @data.matched + elsif @data.scan(OPERATORS) + type = @data.matched + end + Tok.new(@data.matched, @file, @data.pos, type) if type + else + Tok.new("EOF", @file, @data.pos, :eof) + end + end + + def linenum(tok = nil) + @text[0..(tok.pos || @data.pos)].count("\n") + 1 + end +end + +class Parser + LEVELS = { + none: 0, + assign: 1, + or: 2, + and: 3, + equality: 4, + compare: 5, + term: 6, + factor: 7, + unary: 8, + call: 9, + primary: 10, + eof: 11, + } + + LVLNAMES = LEVELS.keys + + RULES = { +# "." => { prefix: nil, infix: :ufcs_call, level: :call }, + "(" => { prefix: :grouping, infix: :func_call, level: :call }, + "+" => { prefix: :unary, infix: :binary, level: :term }, + "-" => { prefix: :unary, infix: :binary, level: :term }, + "*" => { prefix: nil, infix: :binary, level: :factor }, + "/" => { prefix: nil, infix: :binary, level: :factor }, + "=" => { prefix: nil, infix: :assign, level: :assign }, + :let => { prefix: :definition, infix: nil, level: :none }, + :fun => { prefix: :function, infix: nil, level: :none }, + :if => { prefix: :if_expr, infix: nil, level: :none }, + :ident => { prefix: :variable, infix: nil, level: :none }, + "[" => { prefix: :constant, infix: :subscript, level: :call }, + "{" => { prefix: :constant, infix: nil, level: :none }, + :nil => { prefix: :constant, infix: nil, level: :none }, + :bool => { prefix: :constant, infix: nil, level: :none }, + :num => { prefix: :constant, infix: nil, level: :none }, + :string => { prefix: :constant, infix: nil, level: :none }, + :char => { prefix: :constant, infix: nil, level: :none }, + :byte => { prefix: :constant, infix: nil, level: :none }, + } + + Def = Struct.new(:name, :value) + Func = Struct.new(:args, :body) + Call = Struct.new(:func, :args) + IfExpr = Struct.new(:cond, :br1, :br2) + + def initialize(path) + @lex = Lexer.new(path) + @prev = nil + @next = nil + @name = nil + @symbols = {} + end + + def toplevel() + defs = [] + exprs = [] + while !matches(:eof) + expr = expression() + if expr.is_a? Def + defs << expr.name + exprs << Call.new(:set!, [expr.name, expr.value]) + else + exprs << expr; + end + end + [defs, exprs] + end + + ####################################### + # Expression Parsing + ####################################### + def expression() + parseLevel(:assign) + end + + def getRule(tok) + RULES[tok.type] || {} + end + + def keepGoing(level) + rule = getRule(peek()) + (LEVELS[level] <= LEVELS[rule[:level]]) if rule.keys.length > 0 + end + + def parseLevel(level, take = true) + consume() if take + prefixRule = getRule(@prev)[:prefix] + error("error parsing expression") if not prefixRule + parseNextLevel(send(prefixRule), level) + end + + def parseNextLevel(left, level) + ast = left + while keepGoing(level) + consume() + infixRule = getRule(@prev)[:infix] + ast = send(infixRule, ast) + end + ast + end + + def nextLevel(tok) + level = getRule(@prev)[:level] + index = LVLNAMES.find_index(level) + error("unknown operator type '#{tok}'") if not index + LVLNAMES[index + 1] + end + + def assign(left) + Call.new(:set!, [left, expression()]) + end + + def variable() + @prev.text.to_sym + end + + def definition() + name = expect(:ident).text.to_sym + expr = nil + if (matches("(")) + expr = function() + else + expect("=") + expr = expression() + end + Def.new(name, expr) + end + + def function() + args = [] + exprs = [] + expect("(") + while (!matches(")")) + args << expect(:ident).text + expect(",") if not matches(")") + end + expect(")") + while (!matches(:end)) + exprs << expression() + end + expect(:end) + Func.new(args, exprs) + end + + def constant() + if (@prev.type == :num) + @prev.text.to_f + elsif (@prev.type == :string) + @prev.text + elsif (@prev.type == :nil) + nil + elsif (@prev.type == :bool) + (@prev.text == "true") + elsif (@prev.type == "[") + ary = [] + while !matches("]") + ary << expression() + expect(",") if !matches("]") + end + expect("]") + ary + elsif (@prev.type == "{") + hsh = {} + while !matches("}") + name = expect(:ident).text + expect("=") + hsh[name] = expression() + expect(",") if !matches("}") + end + expect("}") + hsh + else + error("not a valid constant") + end + end + + def grouping() + ast = expression() + expect(")") + ast + end + +# def ufcs_call(left) +# # TODO: UFCS call doesnt seem right or well thought out here... +# access = Call.new(:get!, [left, expect(:ident).text]) +# if accept("(") +# func_call(access, left) +# else +# access +# end +# end + + def func_call(ast, first_arg=nil) + call = Call.new(ast, []) + call.args << first_arg if not first_arg.nil? + while !matches(")") + call.args << expression() + expect(",") if !matches(")") + end + expect(")") + call + end + + def unary() + Call.new(@prev.type, [parseLevel(:unary)]) + end + + def binary(left) + Call.new(@prev.type, [left, parseLevel(nextLevel(@prev.type))]) + end + + def if_expr() + expr = IfExpr.new(nil, [], []) + expr.cond = expression() + expr.br1 = if_body() + if accept(:else) + if accept(:if) + expr.br2 = if_expr() + else + expr.br2 = if_body() + expect(:end) + end + end + expr + end + + def if_body() + block = [expression()] + while (!matches(:else) && !matches(:end)) + block << expression() + end + (matches(:else) || matches(:end)) + block + end + + ####################################### + # Parsing Primitives + ####################################### + def error(str) + puts "#{@lex.file}:#{@lex.linenum(@next || @prev)}: #{str}" + exit 1 + end + + def peek() + @next = @lex.next if @next.nil? + @next + end + + def matches(type) + (peek().type == type) + end + + def accept(type) + if (matches(type)) + @prev = @next + @next = nil + true + else + false + end + end + + def expect(type) + tok = peek() + if not accept(type) + error("expected '#{type}', received '#{tok.type}'") + end + tok + end + + def consume() + expect(peek().type) + end + + def eof? + (peek().type == :eof) + end +end + + +module Emit + def self.expr(e) + if (e.is_a? Parser::Call) + Emit.call(e) + elsif (e.is_a? Symbol) + "#{e}" + elsif (e.is_a? String) + "MKSTRING(#{e})" + elsif (e.is_a? Float) + "MKFLOAT(#{e})" + else + puts e + end + end + + def self.string(e) + "MKSTR(#{e})" + end + + def self.call(e) + if e.func == :set! then + "#{e.args[0]} = #{Emit.expr(e.args[1])};" + elsif %w[+ - * /].include?(e.func) + "#{Emit.expr(e.args[0])} #{e.func} #{Emit.expr(e.args[1])}" + else + Emit.calln(e) + end + end + + def self.calln(e) + len = e.args.length + args = e.args.map{|a| Emit.expr(a) }.join(", ") + if len <= 8 + "CALL#{len}(#{e.func}, #{args});" + else + "CALLN(#{e.func}, #{len}, #{args});" + end + end +end + + +parse = Parser.new("dyn.src") +defs, exprs = parse.toplevel + +puts "Value " + (defs.join(", ")) + ";" +puts "void toplevel(void) {" +exprs.each do |e| + puts Emit.expr(e) +# puts e +# if (e.is_a? Parser::Call) +# Emit.call(e) +# else +# puts e +# end +end +puts "}" diff --git a/dyn.src b/dyn.src new file mode 100644 index 0000000..d873699 --- /dev/null +++ b/dyn.src @@ -0,0 +1,30 @@ +#[1,2,3] + +#{ "foo": 123 } +#123 = foo +#foo = 1 +##{} +#true +#false +#123 +#123.0 +#"foo" +#nil +#(456 + 1) +#(456 - 1) +#(456 * 1) +#(456 - 1) +# +#let foo = 123 +#foo = 42 +# +#if 1 +# 2 +#else +# 3 +#end +# +#println("foo!") +#print(1,2,3,4,5,6,7,8,9) +#(-1) +#(+1) diff --git a/dyn2.rb b/dyn2.rb new file mode 100755 index 0000000..3b70fdc --- /dev/null +++ b/dyn2.rb @@ -0,0 +1,467 @@ +#!/usr/bin/env ruby + +require 'strscan' + +BuiltinSyms = {} + +class Symtable + def initialize + @builtins = BuiltinSyms + @scopes = [{}] + end + + def []=(k,v) + @scopes.last[k] = v + end + + def [](k) + (@scopes.map{|h| h[k] }.compact.last || BuiltinSyms[k]) + end + + def scope_start + @scopes.push({}) + end + + def scope_stop + @scopes.pop() + end +end + +class Lexer + Tok = Struct.new(:text, :file, :pos, :type) + SPACE = /([ \t\v\n\r]+|#.*\n)/ + IDENT = /[_a-zA-Z][_a-zA-Z0-9]*/ + BRACES = /[\(\)\[\]\{\}\.]/ + OPERATORS = /[*\/=+-:\$?]/ + NUMBER = /[0-9]+(\.[0-9]+)?/ + STRING = /"(\\"|[^"])*"/ + ID_TYPES = { + "nil" => :nil, + "true" => :bool, + "false" => :bool, + "if" => :if, + "else" => :else, + "end" => :end, + "let" => :let, + "fun" => :fun, + } + + attr_accessor :file + attr_accessor :data + + def initialize(path) + @file = path + @text = File.read(path) + @data = StringScanner.new(@text) + end + + def get_id_type(str) + ID_TYPES[str] || :ident + end + + def next + while @data.skip(SPACE) do + end + if not @data.eos? + type = :eof + if @data.scan(IDENT) + type = get_id_type(@data.matched) + elsif @data.scan(NUMBER) + type = :num + elsif @data.scan(STRING) + type = :string + elsif @data.scan(BRACES) + type = @data.matched + elsif @data.scan(OPERATORS) + type = @data.matched + end + Tok.new(@data.matched, @file, @data.pos, type) if type + else + Tok.new("EOF", @file, @data.pos, :eof) + end + end + + def linenum(tok = nil) + @text[0..(tok.pos || @data.pos)].count("\n") + 1 + end +end + +class Parser + LEVELS = { + none: 0, + assign: 1, + or: 2, + and: 3, + equality: 4, + compare: 5, + term: 6, + factor: 7, + unary: 8, + call: 9, + primary: 10, + eof: 11, + } + + LVLNAMES = LEVELS.keys + + RULES = { + "(" => { prefix: :grouping, infix: :func_call, level: :call }, + "+" => { prefix: :unary, infix: :binary, level: :term }, + "-" => { prefix: :unary, infix: :binary, level: :term }, + "*" => { prefix: nil, infix: :binary, level: :factor }, + "/" => { prefix: nil, infix: :binary, level: :factor }, + "=" => { prefix: nil, infix: :assign, level: :assign }, + :let => { prefix: :definition, infix: nil, level: :none }, + :fun => { prefix: :function, infix: nil, level: :none }, + :if => { prefix: :if_expr, infix: nil, level: :none }, + :ident => { prefix: :variable, infix: nil, level: :none }, + "[" => { prefix: :constant, infix: :subscript, level: :call }, + "{" => { prefix: :constant, infix: nil, level: :none }, + :nil => { prefix: :constant, infix: nil, level: :none }, + :bool => { prefix: :constant, infix: nil, level: :none }, + :num => { prefix: :constant, infix: nil, level: :none }, + :string => { prefix: :constant, infix: nil, level: :none }, + :char => { prefix: :constant, infix: nil, level: :none }, + :byte => { prefix: :constant, infix: nil, level: :none }, + } + + def initialize(path) + @lex = Lexer.new(path) + @syms = Symtable.new() + @prev = nil + @next = nil + @protos = StringIO.new + @funcs = [] + @func_stack = [] + @func_id = 0 + end + + def toplevel() + func_start() + emit_line "Value toplevel(void) {" + expr = emit_expr "NULL" + while !matches(:eof) + expr = expression() + end + emit_line " return #{expr};" + emit_line "}" + func_stop() + end + + ####################################### + # Expression Parsing + ####################################### + def expression() + parseLevel(:assign) + end + + def getRule(tok) + RULES[tok.type] || {} + end + + def keepGoing(level) + rule = getRule(peek()) + (LEVELS[level] <= LEVELS[rule[:level]]) if rule.keys.length > 0 + end + + def parseLevel(level, take = true) + consume() if take + prefixRule = getRule(@prev)[:prefix] + error("error parsing expression") if not prefixRule + parseNextLevel(send(prefixRule), level) + end + + def parseNextLevel(left, level) + ast = left + while keepGoing(level) + consume() + infixRule = getRule(@prev)[:infix] + ast = send(infixRule, ast) + end + ast + end + + def nextLevel(tok) + level = getRule(@prev)[:level] + index = LVLNAMES.find_index(level) + error("unknown operator type '#{tok}'") if not index + LVLNAMES[index + 1] + end + + def assign(left) + if (left.to_s.start_with? "__tmp") + error("invalid expression on left-hand side of assignment") + end + emit_line " #{left} = #{expression()};" + left.to_s + end + + def variable() + @prev.text.to_sym + end + + def definition() + name = expect(:ident).text.to_sym + expect("=") + emit_line " Value #{name} = #{expression()};" + name + end + + def function() # TODO: fix this! + func_start() + name = func_args() + result = nil + while (!matches(:end)) + result = expression() + end + emit_line " return #{result};" + emit_line "}" + expect(:end) + func_stop() + emit_expr "MKFUNC(#{name})" + end + + def func_args() + @func_id += 1 + name = "__anon_func#{@func_id}" + @protos.print "static Value #{name}(" + emit "static Value #{name}(" + expect("(") + while (!matches(")")) + arg = expect(:ident).text + @syms[arg] = :arg + @protos.print "Value #{arg}" + emit "Value #{arg}" + if not matches(")") + @protos.print ", " + emit ", " + expect(",") + end + end + @protos.puts(");") + emit_line ") {" + expect(")") + name + end + + def constant() + if (@prev.type == :num) + emit_expr("MKNUM(#{@prev.text.to_f})") + elsif (@prev.type == :string) + emit_expr("MKSTR(#{@prev.text})") + elsif (@prev.type == :nil) + emit_expr("NULL") + elsif (@prev.type == :bool) + emit_expr("MKBOOL(#{@prev.text == "true"})") + elsif (@prev.type == "[") + array = emit_expr "MKARRAY()" + while !matches("]") + elem = expression() + emit_line " ARRAY_APPEND(#{array}, #{elem})" + expect(",") if !matches("]") + end + expect("]") + array + elsif (@prev.type == "{") + hash = emit_expr "MKHASH()" + while !matches("}") + name = expression() + expect(":") + expr = expression() + emit_line " HASH_SET(#{hash}, #{name}, #{expr})" + expect(",") if !matches("}") + end + expect("}") + hash + else + error("not a valid constant") + end + end + + def grouping() + ast = expression() + expect(")") + ast + end + + def func_call(func, first_arg=nil) + args = [] + while !matches(")") + args << expression() + expect(",") if !matches(")") + end + expect(")") + if args.length > 8 + emit_expr("CALLN(#{func}, #{args.length}, #{args.join(", ")})") + else + emit_expr("CALL#{args.length}(#{func}, #{args.join(", ")})") + end + end + + def unary() + case @prev.type + when "+" + emit_expr("OP_POS(#{parseLevel(:unary)})") + when "-" + emit_expr("OP_NEG(#{parseLevel(:unary)})") + else + error("invalid binary operator #{op.type.inspect}") + end + end + + def binary(left) + op = @prev + right = parseLevel(nextLevel(op.type)) + case op.type + when "+" + emit_expr("OP_ADD(#{left}, #{right})") + when "-" + emit_expr("OP_SUB(#{left}, #{right})") + when "*" + emit_expr("OP_MUL(#{left}, #{right})") + when "/" + emit_expr("OP_DIV(#{left}, #{right})") + else + error("invalid binary operator #{op.type.inspect}") + end + end + + def if_expr() + cond = expression() + result = get_tempvar() + emit_line " Value #{result} = NULL;" + emit_line " if (ISTRUTHY(#{cond})) {" + if_body(result) + if accept(:else) + emit_line " } else {" + if_body(result) + end + expect(:end) + emit_line " }" + result + end + + def if_body(result) + tres = expression() + while (!matches(:else) && !matches(:end)) + tres = expression() + end + (matches(:else) || matches(:end)) + emit_line " #{result} = #{tres};" + end + + ####################################### + # Code Generation Primitives + ####################################### + def get_tempvar() + @tempvar = 0 if @tempvar.nil? + @tempvar += 1 + "__tmp#{@tempvar}" + end + + def emit_line(str) + @func_stack.last.puts str + end + + def emit(str) + @func_stack.last.print str + end + + def emit_expr(expr) + v = get_tempvar() + emit_line " Value #{v} = #{expr};" + v + end + + def dump(file=$stdout) + file.puts @protos.string + @funcs.each {|f| file.puts f.string } + end + + def func_start() + io = StringIO.new + @syms.scope_start + @funcs.push(io) + @func_stack.push(io) + end + + def func_stop + @syms.scope_stop() + @func_stack.pop() + end + + ####################################### + # Parsing Primitives + ####################################### + def error(str) + puts "#{@lex.file}:#{@lex.linenum(@next || @prev)}: #{str}" + exit 1 + end + + def peek() + @next = @lex.next if @next.nil? + @next + end + + def matches(type) + (peek().type == type) + end + + def accept(type) + if (matches(type)) + @prev = @next + @next = nil + true + else + false + end + end + + def expect(type) + tok = peek() + if not accept(type) + error("expected '#{type}', received '#{tok.type}'") + end + tok + end + + def consume() + expect(peek().type) + end + + def eof? + (peek().type == :eof) + end +end + +parse = Parser.new("dyn.src") +parse.toplevel +parse.dump + +####################################### +# Runtime Support +####################################### +##include +##include +# +##define MKBOOL(a) NULL +##define MKNUM(a) NULL +##define MKSTR(a) NULL +##define CALLN(a,b,...) 0 +##define CALL8() 0 +##define CALL1(a,b) 0 +##define ISTRUTHY(a) 0 +##define OP_ADD(a,b) 0 +##define OP_SUB(a,b) 0 +##define OP_MUL(a,b) 0 +##define OP_DIV(a,b) 0 +# +#typedef void* Value; + +# accumulate function prototypes +# accumulate expressions in current buffer +# push new buffer for each function, pop at the end of the function (track scope) +# emit global vars +# emit function prototypes +# emit function bodies +# emit toplevel \ No newline at end of file -- 2.52.0