From 33e8f47dcbca43c8020df0db0a8932d78430cf4c Mon Sep 17 00:00:00 2001 From: Mike Lowis Date: Thu, 9 Nov 2023 16:27:37 -0500 Subject: [PATCH] added compiler script --- aas.rb | 64 ++-- cerise.m | 8 + cerise.rb | 913 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ example.s | 3 + 4 files changed, 952 insertions(+), 36 deletions(-) create mode 100644 cerise.m create mode 100755 cerise.rb diff --git a/aas.rb b/aas.rb index 43f473c..6664e67 100755 --- a/aas.rb +++ b/aas.rb @@ -1,5 +1,19 @@ #!/bin/env ruby +# TODO: +# remove tempfile usage +# implement peephole optimization +# global variable addressing +# global variable loading +# global variable saving +# local variable addressing +# local variable loading +# local variable saving +# parameter variable addressing +# parameter variable loading +# parameter variable saving +# implement if statements + require "tempfile" def emit(text) @@ -7,7 +21,7 @@ def emit(text) end START_CODE = <<-eos -.text +.section .text._start, "ax" .global _start _start: movl (%rsp), %edi @@ -256,41 +270,11 @@ module Targets end end -class Variable - def initialize(name, &block) - $asm_out.write ".data\n" - $asm_out.write "#{name}:\n" - instance_eval(&block) - $asm_out.write "\n" - end - - def int(value) - emit ".long #{value.to_i}" - end - - def float(value) - emit ".long #{value.to_i}" - emit ".long #{value.to_i}" - end - - def bool(value) - emit ".byte #{value ? 1 : 0}" - end - - def byte(value) - emit ".byte 0x#{"%02x" % value.to_i}" - end - - def string(value) - emit ".ascii #{value.to_s.inspect}" - end -end - class Function def initialize(name, nargs = 0, &block) @nargs = nargs @returned = false - $asm_out.write ".text\n" + $asm_out.write ".section .text.#{name}, \"ax\"\n" $asm_out.write "#{name}:\n" emit "pushq %rbp" emit "movq %rsp, %rbp" @@ -339,8 +323,12 @@ class Function end end -def var(name, &block) - Variable.new(name, &block) +def var(name, size, p2align=0) + $asm_out.write ".section .bss.#{name}, \"aw\"\n" + $asm_out.write ".p2align #{p2align}\n" + $asm_out.write "#{name}:\n" + emit ".zero #{size}" + $asm_out.write "\n" end def func(name, nargs=0, &block) @@ -352,9 +340,13 @@ def target(name) $target = Targets.const_get(name.to_s) end +def global(name) + puts ".global #{name}" +end + def gen_syscall_handler(n) regs = %w[%rdi %rsi %rdx %r10 %r8 %r9] - $asm_out.write ".text\n" + $asm_out.write ".section .text.syscall#{n}, \"ax\"\n" $asm_out.write ".global syscall#{n}\n" $asm_out.write "syscall#{n}:\n" emit "movq 8(%rsp), %rax" @@ -377,7 +369,7 @@ ARGV.each do |f| load f end $syscalls.each_with_index do |used, n| - gen_syscall_handler(n) if used + gen_syscall_handler(n) end $asm_out.write START_CODE if $main_obj diff --git a/cerise.m b/cerise.m new file mode 100644 index 0000000..37db56c --- /dev/null +++ b/cerise.m @@ -0,0 +1,8 @@ +#export( +# main +#) + +main() +{ + return 0 +} diff --git a/cerise.rb b/cerise.rb new file mode 100755 index 0000000..7931113 --- /dev/null +++ b/cerise.rb @@ -0,0 +1,913 @@ +#!/usr/bin/env ruby + +require 'stringio' +require 'strscan' + +class Lexer + Tok = Struct.new(:text, :file, :pos, :type) + + attr_accessor :file + attr_accessor :data + + def initialize(path = nil) + @file = (path || "") + @text = (path ? File.read(path) : "") + @data = StringScanner.new(@text) + end + + def parse_string(str) + @file = "" + @text = str + @data = StringScanner.new(@text) + end + + def linenum(pos = nil) + @text[0..(pos || @data.pos)].count("\n") + 1 + end + + SPACE = /([ \t\v\n\r]+|#.*\n)/ + IDENT = /[_a-zA-Z][_a-zA-Z0-9]*!?/ + BRACES = /[\(\)\[\]\{\}\.]/ + OPERATORS = /[:,<>*\/=+\-\$?!]+/ + INTEGER = /[0-9]+/ + FLOATING = /[0-9]+\.[0-9]+/ + STRING = /"(\\"|[^"])*"/ + ID_TYPES = { + "true" => :bool, + "false" => :bool, + "if" => :if, + "then" => :then, + "else" => :else, + "end" => :end, + "let" => :let, + "func" => :func, + "in" => :in, + "set!" => :set, + "is" => :is, + "return" => :return, + } + + def next + while @data.skip(SPACE) do + end + if not @data.eos? + type = :eof + if @data.scan(IDENT) + type = (ID_TYPES[@data.matched] || :ident) + elsif @data.scan(FLOATING) + type = :float + elsif @data.scan(INTEGER) + type = :int + elsif @data.scan(STRING) + type = :string + elsif @data.scan(BRACES) + type = @data.matched + elsif @data.scan(OPERATORS) + type = @data.matched + end + Tok.new(@data.matched, @file, @data.pos, type) if type + else + Tok.new("EOF", @file, @data.pos, :eof) + end + end +end + +class SymTable < Hash + attr_accessor :parent + + def initialize(parent = nil) + @global_scope = true if not parent.nil? + @parent = (parent || {}) + end + + def clone() + SymTable.new(self) + end + + def [](key) + (super(key) || @parent[key]) + end + + def defined?(key) + not self[key].nil? + end + + def typed?(key) + not (self[key] || {})[:type].nil? + end + + def add_sym(name, loc, kind, type = nil) + self[name] = { + loc: loc, + kind: kind, + type: type + } + end + + def set_type(name, type) + (self[name] || {})[:type] = type + end + + def global?(name) + if self[name] + @global_scope + elsif @parent.class == SymTable + @parent.global?(name) + else + nil + end + end + + def depth + count = 0 + env = self + while env.class != Hash + count += 1 + env = env.parent + end + count + end + +# def local(key) +# method(:[]).super_method.call(key) +# end +# +# def defined_ever?(key) +# (not (self[key] || {})[:type].nil?) +# end +# +# def defined_locally?(key) +# (not (local(key) || {})[:type].nil?) +# end +# +# def block_local?(key) +# (not local(key).nil?) +# end +# +# def global?(key) +# if @parent.class == Hash +# block_local?(key) +# elsif block_local? key +# false +# else +# @parent.global? key +# end +# end +# +# def local?(key) +# if @parent.class == Hash +# false +# elsif @type == :func +# block_local? key +# else +# @parent.local? key +# end +# end +# +# def free?(key) +# (defined_ever? key) and (not local? key) and (not global? key) +# end +# +# def merge!(env) +# env.each {|k,v| self[k] = v } +# end +# +# def annotate(key, type) +# self[key] ||= {} +# self[key][:ann] = type +# end +# +# def annotation(key) +# (method(:[]).super_method.call(key) || {})[:ann] +# end +# +# def stack() +# parent = (@parent.is_a?(SymTable) ? @parent.stack : []) +# ([self] + parent).flatten +# end +end + +module IR + Func = Struct.new(:loc, :type, :args, :body) + Return = Struct.new(:loc, :type, :value) + Const = Struct.new(:loc, :type, :value) + Var = Struct.new(:loc, :type, :name) + + Def = Struct.new(:loc, :type, :name, :value) + +# EnvRef = Struct.new(:loc, :type, :index) +# Let = Struct.new(:loc, :type, :var, :expr, :body) +# If = Struct.new(:loc, :type, :cond, :then, :else) +# Set = Struct.new(:loc, :type, :var, :expr) +# Apply = Struct.new(:loc, :type, :func, :args) +end + + +class TypeChecker + UnaryOps = { + "+" => { + int: [:int, :int], + float: [:float, :float], + }, + "-" => { + int: [:int, :int], + float: [:float, :float], + }, + "!" => { + bool: [:bool, :bool] + }, + } + + BinaryOps = { + "+" => { + int: [:int, :int, :int], + float: [:float, :float, :float], + }, + "-" => { + int: [:int, :int, :int], + float: [:float, :float, :float], + }, + "*" => { + int: [:int, :int, :int], + float: [:float, :float, :float], + }, + "/" => { + int: [:int, :int, :int], + float: [:float, :float, :float], + }, + "%" => { + int: [:int, :int, :int], +# Float modulo normally implemented as library function +# float: [:float, :float, :float], + }, + "<" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + }, + ">" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + }, + "<=" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + }, + ">=" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + }, + "==" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + string: [:string, :string, :bool], + }, + "!=" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + string: [:string, :string, :bool], + }, + "&&" => { bool: [:bool, :bool, :bool] }, + "||" => { bool: [:bool, :bool, :bool] }, + "<<" => { int: [:int, :int, :int] }, + ">>" => { int: [:int, :int, :int] }, + "&" => { int: [:int, :int, :int] }, + "^" => { int: [:int, :int, :int] }, + "|" => { int: [:int, :int, :int] }, + } + + def initialize(parser) + @parser = parser + end + + def error(loc, msg) + @parser.error(msg, loc) + end + + def check(env, expr, type) + if (expr.is_a? IR::If) + check_ifexpr(env, expr, type) + elsif (expr.is_a? IR::Func) + check_func(env, expr, type) + elsif (expr.is_a? IR::Var) + check_var(env, expr, type) + elsif (expr.is_a? IR::Let) + check_let(env, expr, type) +# elsif (expr.is_a? IR::Apply) +# check_apply(env, expr, type) + else + etype = infer(env, expr) + if type != etype + error(expr.loc, "expected #{type}, received #{etype}") + end + end + expr.type = type + end + + def infer(env, expr) + if expr.is_a? IR::Const + infer_const(env, expr) + elsif expr.is_a? IR::Var + infer_var(env, expr) + elsif expr.is_a? IR::Let + infer_let(env, expr) + elsif expr.is_a? IR::If + infer_ifexpr(env, expr) + elsif expr.is_a? IR::Set + infer_set(env, expr) + elsif expr.is_a? IR::Func + infer_func(env, expr) + elsif expr.is_a? IR::Apply + infer_apply(env, expr) + else + error(expr.loc, "unable to determine type of expression") + end + end + + private + + def make_typevar() + @typevar ||= 0 + var = "abcdefghijklmnopqrstuvwxyz"[@typevar] + @typevar += 1 + var + end + + def var?(expr) + expr.class == IR::Var + end + + def untyped_global_func?(env, func, type) + type.nil? and + var?(func) and + env.global?(func.name) and + env[func.name][:value] + end + + def check_apply(env, expr, type) + # Handle global functions that haven't been typed yet but are + # being called. We pause to infer their type. + if untyped_global_func?(env, expr.func, type) + value = env[expr.func.name][:value] + env[expr.func.name][:value] = nil + infer(@parser.syms, value) + type = infer(env, expr.func) + end + + error(expr.loc, "object being applied is not a function (has type: #{type.to_s})") if not type.is_a? Array + error(expr.loc, "wrong number of arguments to function call") if (type.length - 1) != expr.args.length + type[0..-2].each_with_index do |t,i| + check(env, expr.args[i], t) + end + expr.type = type.last + end + + def check_ifexpr(env, expr, type) + check(env, expr.cond, :bool) + check(env, expr.then, type) + check(env, expr.else, type) + end + + def check_var(env, expr, type) + etype = infer(env, expr) + if (etype.class == String) + expr.type = type + env.set_type(expr.name, type) + elsif expr.type != type + error(expr.loc, "expected #{type}, received #{etype}") + end + type + end + + def verify(cond, loc, msg) + error(loc, msg) if not cond + end + + def check_func(env, expr, type) + env = env.clone + verify((type.length-1) == expr.args.length, expr.loc, + "incorrect number of arguments (#{expr.args.length}, expected #{(type.length-1)}") + expr.args.each_with_index do |a,i| + env.add_sym(a.name, a.loc, :arg, type[i]) + end + check(env, expr.expr, type.last) + end + + def check_let(env, let, type) + env = env.clone + env.add_sym(let.var.name, let.var.loc, :var, let.var.type) + check(env, let.expr, type) + end + + def infer_const(env, expr) + expr.type + end + + def infer_var(env, expr) + if not env.defined?(expr.name) + error(expr.loc, "symbol '#{expr.name}' not defined") + end + expr.type = env[expr.name][:type] + end + + def infer_let(env, let) + if let.body.nil? + infer_decl(env, let) + else + infer_let_expr(env, let) + end + end + + def infer_decl(env, let) + env = env.clone + # handle the binding + if let.var.type + check(env, let.expr, let.var.type) + else + let.var.type = infer(env, let.expr) + end + env.set_type(let.var.name, let.var.type) + env[let.var.name][:value] = nil + let.type = :void + end + + def infer_let_expr(env, let) + env = env.clone + + # handle the binding + if let.var.type + check(env, let.expr, let.var.type) + else + let.var.type = infer(env, let.expr) + end + + env.add_sym(let.var.name, let.var.loc, :var, let.var.type) + let.type = infer(env, let.body) + end + + def infer_ifexpr(env, expr) + check(env, expr, infer(env, expr.then)) + end + + def infer_set(env, expr) + error(expr.loc, "infer_set unimplemented") + end + + def infer_func(env, expr) + env = env.clone + @typevar = 0 + expr.args.each do |a| + a.type ||= make_typevar() + env.add_sym(a.name, a.loc, :arg, a.type) + end + infer(env, expr.expr) + type = (expr.args + [expr.expr]).map {|v| v.type } + type.unshift(:void) if type.length == 1 + + # the body may have inferred an arg type, fix it up here + expr.args.each_with_index do |a,i| + a.type = env[a.name][:type] + type[i] = a.type + end + expr.type = type + end + + def infer_apply(env, expr) + if expr.func.is_a? String + expr.type = infer_opcall(env, expr) + else + type = infer(env, expr.func) + check_apply(env, expr, type) + end + end + + def assign_type(env, var, type) + if var.class == IR::Var and (var.type.nil? or var.type == String) then + var.type = type + env[var.name][:type] = type + end + end + + def infer_opcall(env, expr) + # infer the operand type first + vtype = infer(env, expr.args[0]) + if (not vtype or vtype.class == String) and expr.args.length == 2 + vtype = infer(env, expr.args[1]) + end + + # use the operand type to pick op type and check it + if expr.args.length == 1 + check_unary(env, expr, vtype) + + elsif expr.args.length == 2 + check_binary(env, expr, vtype) + else + error(expr.loc, "too many operands for operator '#{expr.func}'") + end + end + + def check_unary(env, expr, vtype) + optype = UnaryOps[expr.func][vtype] + error(expr.loc, "unknown unary operation '#{expr.func}' for operand type #{vtype}") if not optype + check_apply(env, expr, optype) + end + + def check_binary(env, expr, vtype) + optype = BinaryOps[expr.func][vtype] + error(expr.loc, "unknown binary operation '#{expr.func}' for operand type #{vtype}") if optype.nil? + + expr.args.each_with_index do |a, i| + assign_type(env, a, optype[i]) + end + check_apply(env, expr, optype) + end +end + + +class Parser + attr_accessor :exprs + attr_accessor :syms + + def initialize(path = nil) + @syms = SymTable.new + parse_file(path) + syms.add_sym(:void, 0, :type, :void) + syms.add_sym(:bool, 0, :type, :bool) + syms.add_sym(:int, 0, :type, :int) + syms.add_sym(:string, 0, :type, :string) + syms.add_sym(:float, 0, :type, :float) + parse_file(path) + end + + def parse_string(str) + @lex = Lexer.new() + @lex.parse_string(str) + @prev = nil + @next = nil + toplevel + end + + def parse_file(path) + @lex = Lexer.new(path) + @prev = nil + @next = nil + toplevel + end + + def toplevel + raise "toplevel() must be implemented by inheriting class" + end + + def error(str, loc = nil) + file, pos = (loc ? [@lex.file, loc] : [@lex.file, (@next || @prev).pos]) + $stderr.puts "#{file}:#{@lex.linenum(pos)}: #{str}" + exit 1 + end + + def peek() + @next = @lex.next if @next.nil? + @next + end + + def matches(type) + (peek().type == type) + end + + def matches_any(types) + types.any? {|type| peek().type == type } + end + + def accept(type) + if (matches(type)) + @prev = @next + @next = nil + true + else + false + end + end + + def expect(type) + tok = peek() + if not accept(type) + error("expected '#{type}', received '#{tok.type}'") + end + tok + end + + def consume() + expect(peek().type) + end + + def eof? + (peek().type == :eof) + end + + def location() + if @prev + [@prev.file, @prev.pos] + else + [@lex.file, 0] + end + end + + + + + + OPERATORS = { + "+" => true, + "-" => true, + "*" => true, + "/" => true, + "%" => true, + } + + def operator? + OPERATORS[peek().type] + end + + def toplevel + imports + exports + @defs = {} + while !matches(:eof) + mod_def = toplevel_defintion() + @defs[mod_def.name.text] = mod_def + +# if decl.class == IR::Let +# syms[decl.var.name][:value] = decl +# end +# @exprs << decl + end + +# @type_checker = TypeChecker.new(self) +# @exprs = [] +# @exprs = @exprs.compact +# @exprs.each do |e| +# @type_checker.infer(syms, e) +# end +# pp syms + pp @defs + end + + def imports + # TBD + end + + def exports + # TBD + end + + def toplevel_defintion + ident = identifier() + if matches("(") then + val = function_definition(ident) + elsif matches("=") then + val = constant_definition(ident) + elsif matches(":") then + val = type_annotation(ident) + elsif matches(:is) + val = type_definition(ident) + else + error("#{ident.name} is not a valid toplevel definition") + end + IR::Def.new(name.loc, nil, ident, val) + end + + def function_definition(name) + args = function_arglist() + func = IR::Func.new(name.loc, nil, args, []) + expect("{") + expect(:return); + if !matches("}") + func.body << IR::Return.new(location, nil, expression()) + end + expect("}"); + end + + def constant_definition(name) + error("constant definitions not yet supported") + end + + def type_definition(name) + error("type definitions not yet supported") + end + + def type_annotation(name) + error("type annotations not yet supported") + end + + + def function_arglist() + args = [] + expect("(") + while !matches(")") + args << identifier() + expect(",") if !matches(")") + end + expect(")") + args + end + + def identifier() + name = expect(:ident) + IR::Var.new(name.pos, nil, name.text.to_sym) + end + + def expression() + constant() + end + + def constant() + tok = consume() + if tok.type == :bool + IR::Const.new(tok.pos, :bool, tok.text == "true") + elsif tok.type == :string + IR::Const.new(tok.pos, :string, tok.text) + elsif tok.type == :int + IR::Const.new(tok.pos, :int, tok.text.to_i) + elsif tok.type == :float + IR::Const.new(tok.pos, :float, tok.text.to_f) + elsif tok.type == :void + IR::Const.new(tok.pos, :void, :void) + else + error("invalid constant") + end + end + + + +# def declaration() +# if matches(:ident) +# expr = ident() +# expr.type = syms[expr.name][:type] if syms[expr.name] +# if accept("=") +# value = expression() +# syms.add_sym(expr.name, expr.loc, :var, expr.type) +# IR::Let.new(expr.loc, nil, expr, value, nil) +# elsif accept(":") +# expr.type = type_spec() +# syms.add_sym(expr.name, expr.loc, :var, expr.type) +# nil +# else +# expression() +# end +# else +# expression() +# end +# end +# +# def expression() +# if matches(:let) +# let_expr() +# else +# complex_expr() +# end +# end +# +# def complex_expr() +# expr = nil +# if matches(:if) +# expr = if_expr() +# elsif matches(:set) +# expr = var_set() +# elsif matches_any(["+","-", "!"]) +# expr = unop_call() +# else +# expr = atomic_expr() +# if matches("(") +# expr = func_call(expr) +# elsif operator? +# expr = binop_call(expr) +# end +# end +# expr +# end +# +# def atomic_expr() +# if matches(:func) +# func_expr() +# elsif matches(:ident) +# ident() +# else +# constant() +# end +# end +# +# +# +# +# def application() +## expr = atomic_expr() +## expr = func_call(expr) if matches("(") +# # EQ, NEQ, '<', LTEQ, '>', GTEQ +# end +# +# def simple_expr() +# # '+', '-', OR +# end +# +# def term() +# # '*', '/', '%', AND +# end +# +# def factor() +# # '(', NOT, atomic +# end +# +# +# +# +# def unop_call() +# op = consume() +# rhs = atomic_expr() +# IR::Apply.new(op.pos, nil, op.type, [rhs]) +# end +# +# def binop_call(expr) +# op = consume() +# rhs = atomic_expr() +# IR::Apply.new(expr.loc, nil, op.type, [expr, rhs]) +# end +# +# def if_expr() +# loc = expect(:if).pos +# cond = atomic_expr() +# expect(:then) +# branch1 = expression() +# expect(:else) +# branch2 = expression() +# IR::If.new(loc, nil, cond, branch1, branch2) +# end +# +# def var_set() +# loc = expect(:set).pos +# name = ident() +# expect("=") +# expr = expression() +# IR::Set.new(loc, nil, name, expr) +# end +# +# def func_call(func) +# args = [] +# expect("(") +# while !matches(")") +# args << atomic_expr() +# expect(",") if not matches(")") +# end +# expect(")") +# IR::Apply.new(func.loc, nil, func, args) +# end +# +# def func_expr() +# loc = expect(:func).pos +# args = [] +# expect("(") +# while !matches(")") +# args << ident() +# args.last.type = type_spec() if accept(":") +# expect(",") if not matches(")") +# end +# expect(")") +# type = type_spec() if accept(":") +# body = expression() +# IR::Func.new(loc, type, args, body) +# end +# +# def ident() +# name = expect(:ident) +# IR::Var.new(name.pos, nil, name.text.to_sym) +# end +# +# def typename() +# name = expect(:ident) +# sym = syms[name.text] +# if sym && sym[:kind] != :type +# error(name.pos, "#{name.text} is not a valid type") +# end +# IR::Var.new(name.pos, nil, name.text.to_sym) +# end +# +# +# def let_expr() +# expect(:let) +# name = ident() +# name.type = type_spec() if accept(":") +# expect("=") +# expr = complex_expr() +# expect(:in) +# body = expression() +# IR::Let.new(name.loc, nil, name, expr, body) +# end +# +# def type_spec() +# type = [typename().name] +# while accept("->") +# type << typename().name +# end +# (type.length == 1 ? type[0] : type) +# end +end + +$parser = Parser.new("cerise.m") +pp $parser diff --git a/example.s b/example.s index c6a6227..0b42821 100644 --- a/example.s +++ b/example.s @@ -5,6 +5,9 @@ # byte 0x42 #end +var :foo, 2 +var :bar, 8, 3 + func :main do # locals 2 int 0 -- 2.52.0