From 97049f030c726fafe5c82d93ad71077b66a00e5f Mon Sep 17 00:00:00 2001 From: Mike Lowis Date: Mon, 3 Jun 2024 11:58:01 -0400 Subject: [PATCH] first commit. Basic compilation mechanics working --- .gitignore | 1 + build.sh | 6 + cerise-c.m | 24 + cerise-c.rb | 1218 +++++++++++++++++++++++++++++++++++++++++++++++++++ runtime.c | 115 +++++ runtime.h | 218 +++++++++ 6 files changed, 1582 insertions(+) create mode 100644 .gitignore create mode 100755 build.sh create mode 100644 cerise-c.m create mode 100755 cerise-c.rb create mode 100644 runtime.c create mode 100644 runtime.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9654987 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +cerise-c cerise-c.c diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..d5f553e --- /dev/null +++ b/build.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +./cerise-c.rb > cerise-c.c \ + && cc -I. -O2 -o cerise-c cerise-c.c runtime.c \ + && ./cerise-c + diff --git a/cerise-c.m b/cerise-c.m new file mode 100644 index 0000000..54064a2 --- /dev/null +++ b/cerise-c.m @@ -0,0 +1,24 @@ +TestLiterals() { + assert 42 + assert 42.0 + assert true + #assert false + #assert nil + assert "123" + return true +} + +TestBoolOps() { + assert true == true + assert false == false + assert true != false + assert true && true + assert true || false + assert false || true + return true +} + +init() { + TestLiterals() + return 42.0 +} diff --git a/cerise-c.rb b/cerise-c.rb new file mode 100755 index 0000000..c6bae56 --- /dev/null +++ b/cerise-c.rb @@ -0,0 +1,1218 @@ +#!/usr/bin/env ruby + +# TODO: +# +# * Add unary operators for integers +# * Add type checking to operators on constants +# * Add optimization of constant operators +# * Add increment/decrement optimization +# * Implement function calls +# * Implement string constants +# * Implement if statement +# * Implement tail call optimization? + +require 'stringio' +require 'strscan' + + +## +# Intermediate Representation Module +## + +module IR + Func = Struct.new(:loc, :type, :args, :locals, :body) + Return = Struct.new(:loc, :type, :value) + Assert = Struct.new(:loc, :type, :value) + Const = Struct.new(:loc, :type, :value) + Var = Struct.new(:loc, :type, :name) + Def = Struct.new(:loc, :type, :name, :value) + Set = Struct.new(:loc, :type, :name, :value) + Op = Struct.new(:loc, :type, :op, :left, :right) + Call = Struct.new(:loc, :type, :func, :args) + If = Struct.new(:loc, :type, :cond, :then, :else) +end + + +## +# Data Type Representation Module +## + +module Value + # Forms: :bool, :int, :float, :void, :array, :record, :string, :proc + Type = Struct.new(:form, :fields, :base, :size) + Field = Struct.new(:type, :offset) + + # Base type definitions + Void = Type.new(:void, nil, nil, 0) + Bool = Type.new(:bool, nil, nil, 1) + Int = Type.new(:int, nil, nil, 8) + Float = Type.new(:float, nil, nil, 8) + String = Type.new(:string, nil, nil, 8) +end + + +## +# Lexical Analyzer Definition +## + +class Lexer + Tok = Struct.new(:text, :file, :pos, :type) + + attr_accessor :file + attr_accessor :data + + def initialize(path = nil) + @file = (path || "") + @text = (path ? File.read(path) : "") + @data = StringScanner.new(@text) + end + + def parse_string(str) + @file = "" + @text = str + @data = StringScanner.new(@text) + end + + def linenum(pos = nil) + pos = [pos || @data.pos].flatten.last + @text[0..pos].count("\n") + 1 + end + + SPACE = /([ \t\v\n\r]+|#.*\n)/ + IDENT = /[_a-zA-Z][_a-zA-Z0-9]*/ + BRACES = /[\(\)\[\]\{\}\.]/ + OPERATORS = /[:,<>*\/=+\-\$?!%&|]+/ + INTEGER = /[0-9]+/ + FLOATING = /[0-9]+\.[0-9]+/ + STRING = /"(\\"|[^"])*"/ + ID_TYPES = { + "true" => :bool, + "false" => :bool, + "if" => :if, + "else" => :else, + "def" => :def, + "set" => :set, + "return" => :return, + "assert" => :assert, + } + + def next + while @data.skip(SPACE) do + end + if not @data.eos? + type = :eof + if @data.scan(IDENT) + type = (ID_TYPES[@data.matched] || :ident) + elsif @data.scan(FLOATING) + type = :float + elsif @data.scan(INTEGER) + type = :int + elsif @data.scan(STRING) + type = :string + elsif @data.scan(BRACES) + type = @data.matched + elsif @data.scan(OPERATORS) + type = @data.matched + end + tok = Tok.new(@data.matched, @file, @data.pos, type) if type + else + tok = Tok.new("EOF", @file, @data.pos, :eof) + end +# pp tok + tok + end +end + + +## +# Code Parsing Module +## + +class Parser + attr_accessor :syms + attr_accessor :checker + + def initialize(path = nil) + @syms = SymTable.new + @checker = TypeChecker.new(self) + syms.add_builtin(:void, 0, :type, :void) + syms.add_builtin(:bool, 0, :type, :bool) + syms.add_builtin(:int, 0, :type, :int) + syms.add_builtin(:string, 0, :type, :string) + syms.add_builtin(:float, 0, :type, :float) + parse_file(path) + end + + def parse_file(path) + @lex = Lexer.new(path) + @prev = nil + @next = nil + toplevel + end + + def error(str, loc = nil) + file, pos = (loc ? [@lex.file, loc] : [@lex.file, (@next || @prev).pos]) + $stderr.puts "#{file}:#{@lex.linenum(pos)}: #{str}" + exit 1 + end + + def peek() + @next = @lex.next if @next.nil? + @next + end + + def matches(type) + (peek().type == type) + end + + def matches_any(types) + types.any? {|type| peek().type == type } + end + + def accept(type) + if (matches(type)) + @prev = @next + @next = nil + true + else + false + end + end + + def expect(type) + tok = peek() + if not accept(type) + error("expected '#{type}', received '#{tok.type}'") + end + tok + end + + def consume() + expect(peek().type) + end + + def eof? + (peek().type == :eof) + end + + def location() + if @prev + [@prev.file, @prev.pos] + else + [@lex.file, 0] + end + end + + OPERATORS = { + "+" => true, + "-" => true, + "*" => true, + "/" => true, + "%" => true, + "<" => true, + ">" => true, + "<=" => true, + ">=" => true, + "==" => true, + "!=" => true, + "&&" => true, + "||" => true, + } + + def operator? + OPERATORS[peek().type] + end + + def toplevel + imports + exports + while !matches(:eof) + mod_def = toplevel_defintion() + if mod_def.value.is_a? IR::Const + kind = :const + elsif mod_def.value.is_a? IR::Var + kind = :var + elsif mod_def.value.is_a? IR::Func + kind = :func +# elsif mod_def.value.is_a? IR::Type +# kind = :type + else + raise "invalid toplevel definition #{mod_def.value.class}" + end + + syms.add_sym( + mod_def.name.name, + mod_def.name.loc, + kind, + mod_def.type, + mod_def.value + ) + end + end + + def imports + # TBD + end + + def exports + # TBD + end + + def toplevel_defintion + ident = identifier() + if matches("(") then + val = function_definition(ident) + elsif matches("=") then + val = constant_definition(ident) + else + error("#{ident.name} is not a valid toplevel definition") + end + IR::Def.new(ident.loc, nil, ident, val) + end + + def function_definition(name) + args = function_arglist() + func = IR::Func.new(name.loc, :void, args, [], []) + defs = [] +# if accept(":") +# func.type = expect(:ident).text.to_sym +# end + expect("{") + while matches(:def) + local = variable_definition() + func.locals << local.name.name + defs << local; + end + stmts = statement_list(["}", :return]) + stmts << return_statement + func.body = defs + stmts + expect("}"); + func + end + + def constant_definition(name) + error("constant definitions not yet supported") + end + + def function_arglist() + args = [] + expect("(") + while !matches(")") + id = identifier() +# expect(":") +# id.type = expect(:ident).text.to_sym + args << id + expect(",") if !matches(")") + end + expect(")") + args + end + + def identifier() + name = expect(:ident) + IR::Var.new(name.pos, nil, name.text.to_sym) + end + + + ## + # Statements + ## + def statement_list(terminators) + stmts = [] + while !matches_any(terminators) + stmts << statement + end + stmts + end + + def statement() + if matches(:set) + variable_assignment() + elsif matches(:assert) + assert_statement() + elsif matches(:if) + if_statement() + else + expression() + end + end + + def return_statement() + loc = location + expect(:return); + if !matches("}") + IR::Return.new(loc, nil, expression()) + else + IR::Return.new(loc, nil, nil) + end + end + + def variable_definition() + loc = location + expect(:def) + name = identifier() + expect ("=") + value = expression() + IR::Def.new(loc, nil, name, value) + end + + def variable_assignment() + loc = location + expect(:set) + name = identifier() + expect ("=") + value = expression() + IR::Set.new(loc, nil, name, value) + end + + def assert_statement() + loc = location + expect(:assert); + IR::Assert.new(loc, nil, expression()) + end + + + def if_statement() + loc = location + expect(:if) + cond = expression() + then_branch = statement_block() + if accept(:else) + if matches(:if) + else_branch = [ if_statement() ] + else + else_branch = statement_block() + end + end + IR::If.new(loc, nil, cond, then_branch, else_branch) + end + + def statement_block() + loc = location + stmts = [] + expect("{") + while !matches("}") + stmts << statement() + end + expect("}") + return stmts + end + + + ## + # Expressions + ## + + def expression() + left = logical_expr() + if matches_any(["&&", "||"]) + loc = location + op = consume() + right = logical_expr() + left = make_binop(loc, op.text, left, right) + end + left + end + + + def logical_expr() + left = simple_expr() + if matches_any(["==", "!=", "<", "<=", ">", ">="]) + loc = location + op = consume() + right = simple_expr() + left = make_binop(loc, op.text, left, right) + end + left + end + + def simple_expr() + # Handle unary ops first + if matches_any(["+", "-", :not]) + loc = location + op = consume() + left = term() + left = make_unop(location, op.text, left, nil) + else + left = term() + end + + # now look for binary ops + while matches_any(["+", "-", :or]) + loc = location + op = consume() + right = term() + left = make_binop(loc, op.text, left, right) + end + left + end + + def term() + left = factor() + while matches_any(["*", "/", "%", :and]) + loc = location + op = consume() + right = factor(); + left = make_binop(loc, op.text, left, right) + end + left + end + + def make_binop(loc, op, left, right) + node = IR::Op.new(loc, nil, op, left, right) +# checker.infer(syms, node) + node + end + + def make_unop(loc, op, value) + node = IR::Op.new(loc, nil, op, value, nil) +# checker.infer(syms, node) + node + end + + def factor() + if accept("(") + expr = expression() + expect(")") + else + expr = const_or_ident() + end + + # Check for function call + if matches("(") + expr = func_call(expr) + end + expr + end + + def func_call(expr) + call = IR::Call.new(location, nil, expr, []) + expect("(") + while !matches(")") + call.args << expression() + expect(",") if !matches(")") + end + expect(")") + call + end + + def const_or_ident() + tok = consume() + if tok.type == :bool + IR::Const.new(tok.pos, Value::Bool, tok.text == "true") + elsif tok.type == :string + IR::Const.new(tok.pos, Value::String, tok.text) + elsif tok.type == :int + IR::Const.new(tok.pos, Value::Int, tok.text.to_i) + elsif tok.type == :float + IR::Const.new(tok.pos, Value::Float, tok.text.to_f) + elsif tok.type == :void + IR::Const.new(tok.pos, Value::Void, :void) + elsif tok.type == :ident + IR::Var.new(tok.pos, nil, tok.text.to_sym) + else + error("invalid constant #{tok}") + end + end +end + + +## +# Symbol Table Definition +## + +class SymTable + Symbol = Struct.new(:name, :loc, :kind, :type, :value) + + def initialize() + @scopes = [{}, {}] + end + + def builtin?(name) + (not local? name) and (not (@scopes[0] || {})[name].nil?) + end + + def global?(name) + (not local? name) and (not (@scopes[1] || {})[name].nil?) + end + + def parameter?(name) + (not local? name) and (not (@scopes[2] || {})[name].nil?) + end + + def local?(name) + find_sym(@scopes[3..-1], name) + end + + def find_sym(scopes, name) + scopes.reverse_each.detect do |scope| + return scope[name] + end + end + + def open_scope() + @scopes.push({}) + end + + def close_scope() + @scopes.pop + end + + def reset_scope() + @scopes = @scopes[0..1] + end + + def [](name) + find_sym(@scopes, name) + end + + def add_builtin(name, kind, type, value) + @scopes[0][name] = + Symbol.new(name, 0, kind, type, value) + end + + def add_sym(name, loc, kind, type, value) + @scopes.last[name] = + Symbol.new(name, loc, kind, type, value) + end + + def each(&block) + @scopes.last.each(&block) + end +end + + +## +# Type Checking/Inference Module +## + +class TypeChecker + BaseToTypes = { + :bool => Value::Bool, + :int => Value::Int, + :float => Value::Float, +# :string => Value::String, + } + + TypesToBase = { + Value::Bool => :bool, + Value::Int => :int, + Value::Float => :float, + } + + UnaryOps = { + "+" => { + int: [:int, :int], + float: [:float, :float], + }, + "-" => { + int: [:int, :int], + float: [:float, :float], + }, + "!" => { + bool: [:bool, :bool] + }, + } + + BinaryOps = { + "+" => { + int: [:int, :int, :int], + float: [:float, :float, :float], + }, + "-" => { + int: [:int, :int, :int], + float: [:float, :float, :float], + }, + "*" => { + int: [:int, :int, :int], + float: [:float, :float, :float], + }, + "/" => { + int: [:int, :int, :int], + float: [:float, :float, :float], + }, + "%" => { + int: [:int, :int, :int], +# Float modulo normally implemented as library function +# float: [:float, :float, :float], + }, + "<" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + }, + ">" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + }, + "<=" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + }, + ">=" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + bool: [:bool, :bool, :bool], + }, + "==" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + bool: [:bool, :bool, :bool], + string: [:string, :string, :bool], + }, + "!=" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + bool: [:bool, :bool, :bool], + string: [:string, :string, :bool], + }, + "&&" => { bool: [:bool, :bool, :bool] }, + "||" => { bool: [:bool, :bool, :bool] }, + "<<" => { int: [:int, :int, :int] }, + ">>" => { int: [:int, :int, :int] }, + "&" => { int: [:int, :int, :int] }, + "^" => { int: [:int, :int, :int] }, + "|" => { int: [:int, :int, :int] }, + } + + def initialize(parser) + @parser = parser + end + + def error(loc, msg) + @parser.error(msg, loc) + end + + def check(env, expr, type) + if expr.is_a? IR::Const + check_const(env, expr, type) + elsif (expr.is_a? IR::Op) + check_op(env, expr, type) + elsif expr.is_a? IR::Var + check_var(env, expr, type) +# elsif expr.is_a? IR::Let +# check_let(env, expr, type) +# elsif expr.is_a? IR::If +# check_ifexpr(env, expr, type) +# elsif expr.is_a? IR::Set +# check_set(env, expr, type) +# elsif expr.is_a? IR::Func +# check_func(env, expr, type) +# elsif expr.is_a? IR::Apply +# check_apply(env, expr, type) + else + etype = infer(env, expr) + if type != etype + error(expr.loc, "expected #{type}, received #{etype}") + end + end + expr.type = type + end + + def infer(env, expr) + if expr.is_a? IR::Func + infer_func(env, expr) + elsif expr.is_a? IR::Return + infer_return(env, expr) + elsif expr.is_a? IR::Const + infer_const(env, expr) + elsif expr.is_a? IR::Var + infer_var(env, expr) + elsif expr.is_a? IR::Def + infer_def(env, expr) + elsif expr.is_a? IR::Set + infer_set(env, expr) + elsif expr.is_a? IR::Op + infer_op(env, expr) + elsif expr.is_a? IR::Call + infer_call(env, expr) + elsif expr.is_a? IR::If + infer_if(env, expr) + else + pp expr + error(expr.loc, "unable to determine type of expression") + end + end + + private + + ## + # TYPE INFERENCE + ## + + def infer_func(env, expr) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def infer_return(env, expr) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def infer_const(env, expr) + expr.type + end + + def infer_var(env, expr) + if env[expr.name].nil? + error(expr.loc, "symbol '#{expr.name}' not defined") + end + expr.type = env[expr.name][:type] + end + + def infer_def(env, expr) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def infer_set(env, expr) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def infer_op(env, expr) + # infer the operand type first + vtype = infer(env, expr.left) + if (expr.left and expr.right) + check_binary(env, expr, vtype) + else + check_unary(env, expr, vtype) + end + end + + def infer_call(env, expr) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def infer_if(env, expr) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + ## + # TYPE CHECKING + ## + + + def check_func(env, expr, type) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def check_return(env, expr, type) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def check_const(env, expr, type) + raise "unimplemented" + error(expr.loc, "unimplemented") +# expr, type.type + end + + def check_var(env, expr, type) + etype = infer(env, expr) + if (etype.class == String) + expr.type = type + env.set_type(expr.name, type) + elsif expr.type != type + error(expr.loc, "expected #{type}, received #{etype}") + end + type + end + + def check_def(env, expr, type) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def check_set(env, expr, type) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def check_op(env, expr, type) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def check_call(env, expr, type) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def check_if(env, expr, type) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + + +# def make_typevar() +# @typevar ||= 0 +# var = "abcdefghijklmnopqrstuvwxyz"[@typevar] +# @typevar += 1 +# var +# end +# +# def var?(expr) +# expr.class == IR::Var +# end +# +# def untyped_global_func?(env, func, type) +# type.nil? and +# var?(func) and +# env.global?(func.name) and +# env[func.name][:value] +# end +# +# def check_apply(env, expr, type) +# # Handle global functions that haven't been typed yet but are +# # being called. We pause to infer their type. +# if untyped_global_func?(env, expr.func, type) +# value = env[expr.func.name][:value] +# env[expr.func.name][:value] = nil +# infer(@parser.syms, value) +# type = infer(env, expr.func) +# end +# +# error(expr.loc, "object being applied is not a function (has type: #{type.to_s})") if not type.is_a? Array +# error(expr.loc, "wrong number of arguments to function call") if (type.length - 1) != expr.args.length +# type[0..-2].each_with_index do |t,i| +# check(env, expr.args[i], t) +# end +# expr.type = type.last +# end +# +# def check_ifexpr(env, expr, type) +# check(env, expr.cond, :bool) +# check(env, expr.then, type) +# check(env, expr.else, type) +# end +# +# +# def verify(cond, loc, msg) +# error(loc, msg) if not cond +# end +# +# def check_func(env, expr, type) +# env = env.clone +# verify((type.length-1) == expr.args.length, expr.loc, +# "incorrect number of arguments (#{expr.args.length}, expected #{(type.length-1)}") +# expr.args.each_with_index do |a,i| +# env.add_sym(a.name, a.loc, :arg, type[i]) +# end +# check(env, expr.expr, type.last) +# end +# +# def check_let(env, let, type) +# env = env.clone +# env.add_sym(let.var.name, let.var.loc, :var, let.var.type) +# check(env, let.expr, type) +# end + +# +# def infer_let(env, let) +# if let.body.nil? +# infer_decl(env, let) +# else +# infer_let_expr(env, let) +# end +# end +# +# def infer_decl(env, let) +# env = env.clone +# # handle the binding +# if let.var.type +# check(env, let.expr, let.var.type) +# else +# let.var.type = infer(env, let.expr) +# end +# env.set_type(let.var.name, let.var.type) +# env[let.var.name][:value] = nil +# let.type = :void +# end +# +# def infer_let_expr(env, let) +# env = env.clone +# +# # handle the binding +# if let.var.type +# check(env, let.expr, let.var.type) +# else +# let.var.type = infer(env, let.expr) +# end +# +# env.add_sym(let.var.name, let.var.loc, :var, let.var.type) +# let.type = infer(env, let.body) +# end +# +# def infer_ifexpr(env, expr) +# check(env, expr, infer(env, expr.then)) +# end +# +# def infer_set(env, expr) +# error(expr.loc, "infer_set unimplemented") +# end +# +# def infer_func(env, expr) +# env = env.clone +# @typevar = 0 +# expr.args.each do |a| +# a.type ||= make_typevar() +# env.add_sym(a.name, a.loc, :arg, a.type) +# end +# infer(env, expr.expr) +# type = (expr.args + [expr.expr]).map {|v| v.type } +# type.unshift(:void) if type.length == 1 +# +# # the body may have inferred an arg type, fix it up here +# expr.args.each_with_index do |a,i| +# a.type = env[a.name][:type] +# type[i] = a.type +# end +# expr.type = type +# end +# +# def infer_apply(env, expr) +# if expr.func.is_a? String +# expr.type = infer_opcall(env, expr) +# else +# type = infer(env, expr.func) +# check_apply(env, expr, type) +# end +# end +# +# def assign_type(env, var, type) +# if var.class == IR::Var and (var.type.nil? or var.type == String) then +# var.type = type +# env[var.name][:type] = type +# end +# end + + + def check_unary(env, expr, vtype) + optype = UnaryOps[expr.op][TypesToBase[vtype]] + error(expr.loc, "unknown unary operation '#{expr.op}' for operand type #{vtype}") if not optype + optype = optype.map{|v| BaseToTypes[v] } + check(env, expr.left, optype[0]) + expr.type = optype[-1] + end + + def check_binary(env, expr, vtype) + pp expr + optype = BinaryOps[expr.op][TypesToBase[vtype]] + error(expr.loc, "unknown binary operation '#{expr.op}' for operand type #{vtype}") if optype.nil? + optype = optype.map{|v| BaseToTypes[v] } + check(env, expr.left, optype[0]) + check(env, expr.right, optype[1]) + expr.type = optype[-1] + end +end + + +## +# Code Generation Module +## + +module Codegen + State = Struct.new(:label, :locals, :syms, :temp, :indent) + + def self.init(syms) + State.new(0, [], syms, 0, 1) + end + + def self.putln(state, msg) + puts (" " * state.indent) + msg + end + + def self.mktemp(state) + name = "_t#{state.temp}" + state.temp += 1 + name + end + + def self.genlabel(state) + label = state.label + state.label = state.label + 1 + end + + def self.emit(state, v) + if v.is_a? IR::Return then + emit_return(state, v) + elsif v.is_a? IR::Const then + emit_const(state, v) + elsif v.is_a? IR::Op then + emit_binop(state, v) + elsif v.is_a? IR::Var then + emit_var(state, v) + elsif v.is_a? IR::Call then + emit_call(state, v) + elsif v.is_a? IR::Def then + emit_def(state, v) + elsif v.is_a? IR::Set then + emit_set(state, v) + elsif v.is_a? IR::Assert then + emit_assert(state, v) + elsif v.is_a? IR::If then + emit_if(state, v) + else + raise "code emitting of #{v.class} not implemented" + end + end + + def self.emit_return(state, v) + temp = emit(state, v.value) + putln state, "return #{temp};" + end + + def self.emit_const(state, v) + var = mktemp(state) + if v.value.is_a? Integer + putln state, "Value #{var} = MakeInt(#{v.value});" + elsif v.value.is_a? Float + putln state, "Value #{var} = MakeReal(#{v.value});" + elsif v.value == true || v.value == false + putln state, "Value #{var} = MakeBool(#{v.value});" + elsif v.value.is_a? String + putln state, "Value #{var} = MakeString(#{v.value});" + else + raise "code emitting constants of this type not implemented" + end + var + end + + def self.emit_binop(state, v) + if v.op == "&&" + raise "logical and" + elsif v.op == "||" + raise "logical or" + else + lvar = emit(state, v.left) + rvar = emit(state, v.right) + result = mktemp(state); + case v.op + when "+" + putln state, "Value #{result} = OpAdd(#{lvar}, #{rvar});" + when "-" + putln state, "Value #{result} = OpSub(#{lvar}, #{rvar});" + when "*" + putln state, "Value #{result} = OpMul(#{lvar}, #{rvar});" + when "/" + putln state, "Value #{result} = OpDiv(#{lvar}, #{rvar});" + when "%" + putln state, "Value #{result} = OpMod(#{lvar}, #{rvar});" + when "<" + putln state, "Value #{result} = OpLt(#{lvar}, #{rvar});" + when "<=" + putln state, "Value #{result} = OpLtEq(#{lvar}, #{rvar});" + when ">" + putln state, "Value #{result} = OpGt(#{lvar}, #{rvar});" + when ">=" + putln state, "Value #{result} = OpGte(#{lvar}, #{rvar});" + when "==" + putln state, "Value #{result} = OpEq(#{lvar}, #{rvar});" + when "!=" + putln state, "Value #{result} = OpNeq(#{lvar}, #{rvar});" + + # when "&&" + # putln state, "Value #{result} = OpAnd(#{lvar}, #{rvar});" + # + # when "||" + # putln state, "Value #{result} = OpOr(#{lvar}, #{rvar});" + + else + raise "not implemented" + end + end + result + end + + def self.emit_var(state, v) + pp v + var = state.syms[v.name] + if var.nil? + raise "no such variable: #{v.name}" + end + var + end + + def self.emit_call(state, v) + result = mktemp(state) + vars = v.args.reverse.map do |arg| + emit(state, arg) + end.join(", ") + putln state, "Value #{result} = #{v.func.name}(#{vars});" + result + end + + def self.emit_def(state, v) + state.syms.add_sym( + v.name.name, v.loc, :local, v.type, v.value) + temp = emit(state, v.value) + raise "invalid local definition: #{v.name.name}" if not state.locals.index(v.name.name) + putln state, "Value #{v.name.name} = #{temp};" + end + + def self.emit_set(state, v) + temp = emit(state, v.value) + raise "invalid local definition: #{v.name.name}" if not state.locals.index(v.name.name) + putln state, "#{v.name.name} = #{temp};" + end + + def self.emit_assert(state, v) + temp = emit(state, v.value) + putln state, "Assert(__FILE__, __LINE__, #{temp});" + end + + def self.emit_if(state, v) + cond = emit(state, v.cond) + putln state, "if (#{cond}) {" + emit_block(state, v.then) + putln state, "} else {" + emit_block(state, v.else) + putln state, "}" + end + + def self.emit_block(state, v) + return if v.nil? + state.indent += 1 + v.each do |v| + emit(state, v); + end + state.indent -= 1 + end +end + + +## +# Emit the Code +## + +$parser = Parser.new("cerise-c.m") +$checker = TypeChecker.new($parser) +state = Codegen.init($parser.syms) + +puts "#include \n\n" + +$parser.syms.each do |name, val| + args = val.value.args.map{|e| "Value #{e.name}" }.join(", ") + puts "Value #{name}(#{args});" +end +puts "" +$parser.syms.each do |name, val| + args = val.value.args.map{|e| "Value #{e.name}" }.join(", ") + puts "Value #{name}(#{args}) {" + $parser.syms.open_scope + state.locals = val.value.locals + val.value.args.each_with_index do |name, idx| + $parser.syms.add_sym( + name.name, name.loc, :param, name.type, idx) + end + val.value.body.each do |stmnt| + Codegen.emit(state, stmnt) + end + puts "}\n\n" + $parser.syms.close_scope +end diff --git a/runtime.c b/runtime.c new file mode 100644 index 0000000..8b86902 --- /dev/null +++ b/runtime.c @@ -0,0 +1,115 @@ +/** + + Immediate Tag Values: + Nil { 0x00 + Undefined { 0x01 + False { 0x02 + True { 0x03 + / 0x04 + Unused { + \ 0x07 + + NaN Tag Values: + Object { 0000:PPPP:PPPP:PPPP + Array { 0001:PPPP:PPPP:PPPP + String { 0002:PPPP:PPPP:PPPP + Block { 0003:PPPP:PPPP:PPPP + NativeBlock { 0004:PPPP:PPPP:PPPP + Symbol { 0005:0000:IIII:IIII + Integer { 0006:0000:IIII:IIII + / 0007:****:****:**** + Double { ... + \ FFFF:****:****:**** + +*/ + +#include + +/* Start The Program + *************************************************/ + +int main(int argc, char** argv) +{ + String *str1, *str2; + Value val; + + val = MakeNil(); + assert(IsNil(val)); + assert(!IsFalse(val)); + assert(!IsTrue(val)); + assert(!IsBool(val)); + assert(!IsNumber(val)); + assert(!IsInt(val)); + assert(!IsReal(val)); + assert(!IsString(val)); + + val = MakeBool(true); + assert(!IsNil(val)); + assert(!IsFalse(val)); + assert(IsTrue(val)); + assert(IsBool(val)); + assert(!IsNumber(val)); + assert(!IsInt(val)); + assert(!IsReal(val)); + assert(!IsString(val)); + + val = MakeBool(false); + assert(!IsNil(val)); + assert(IsFalse(val)); + assert(!IsTrue(val)); + assert(IsBool(val)); + assert(!IsNumber(val)); + assert(!IsInt(val)); + assert(!IsReal(val)); + assert(!IsString(val)); + + val = MakeInt(42); + assert(!IsNil(val)); + assert(!IsFalse(val)); + assert(!IsTrue(val)); + assert(!IsBool(val)); + assert(IsNumber(val)); + assert(IsInt(val)); + assert(!IsReal(val)); + assert(!IsString(val)); + + val = MakeReal(42.0); + assert(!IsNil(val)); + assert(!IsFalse(val)); + assert(!IsTrue(val)); + assert(!IsBool(val)); + assert(IsNumber(val)); + assert(!IsInt(val)); + assert(IsReal(val)); + assert(!IsString(val)); + + val = MakeString("Hi!"); + assert(!IsNil(val)); + assert(!IsFalse(val)); + assert(!IsTrue(val)); + assert(!IsBool(val)); + assert(!IsNumber(val)); + assert(!IsInt(val)); + assert(!IsReal(val)); + assert(IsString(val)); + assert(!strcmp(ValueAsString(val)->bytes, "Hi!")); + + val = ToString(MakeNil()); + assert(!strcmp(ValueAsString(val)->bytes, "nil")); + + val = ToString(MakeBool(true)); + assert(!strcmp(ValueAsString(val)->bytes, "true")); + + val = ToString(MakeBool(false)); + assert(!strcmp(ValueAsString(val)->bytes, "false")); + + val = ToString(MakeInt(42)); + assert(!strcmp(ValueAsString(val)->bytes, "42")); + + val = ToString(MakeReal(42.5)); + assert(!strcmp(ValueAsString(val)->bytes, "42.500000")); + + extern Value init(void); + (void)init(); + return 0; +} diff --git a/runtime.h b/runtime.h new file mode 100644 index 0000000..20fb048 --- /dev/null +++ b/runtime.h @@ -0,0 +1,218 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#if (UINTPTR_MAX == 0xFFFFFFFFu) +#error "32-bit machines are not supported at this time" +#endif + +//# Nil X +//# Bool X +//# Int X +//# Real X +//# Record . +//# Array +//# Hashmap +//# String + + + +//#define VALUE_NIL 0x0llu +//#define VALUE_UNDEFINED 0x1llu +//#define VALUE_FALSE 0x2llu +//#define VALUE_TRUE 0x3llu + +//#define NAN_TAG_RECORD 0x0000000000000000llu +//#define NAN_TAG_ARRAY 0x0001000000000000llu +#define NAN_TAG_STRING 0x0002000000000000llu +//#define NAN_TAG_BLOCK 0x0003000000000000llu +//#define NAN_TAG_NATIVE 0x0004000000000000llu +//#define NAN_TAG_SYMBOL 0x0005000000000000llu +#define NAN_TAG_INTEGER 0x0006000000000000llu +#define NAN_TAG_DOUBLE 0x0007000000000000llu + +#define MASK_POINTER 0x0000fffffffffffcllu +#define HIGH16_TAG 0xffff000000000000llu +//#define IMM_TAG 0x0000000000000003llu +//#define TAG_BITS (HIGH16_TAG | IMM_TAG) + + +#define VALUE_NIL 0x0llu +#define VALUE_UNUSED 0x1llu +#define VALUE_FALSE 0x2llu +#define VALUE_TRUE 0x3llu + +typedef union { + uint64_t as_uint64; + double as_double; +} Value; + +typedef struct { + uint64_t length; + char bytes[]; +} String; + + +/* Value Tests + *************************************************/ + +static inline bool IsNil(Value val) { + return val.as_uint64 == VALUE_NIL; +} + +static inline bool IsFalse(Value val) { + return val.as_uint64 == VALUE_FALSE; +} + +static inline bool IsTrue(Value val) { + return val.as_uint64 == VALUE_TRUE; +} + +static inline bool IsBool(Value val) { + return (IsTrue(val) || IsFalse(val)); +} + +static inline bool IsNumber(Value val) { + return val.as_uint64 >= NAN_TAG_INTEGER; +} + +static inline bool IsInt(Value val) { + return (val.as_uint64 & HIGH16_TAG) == NAN_TAG_INTEGER; +} + +static inline bool IsReal(Value val) { + return IsNumber(val) && !IsInt(val); +} + +static inline bool IsString(Value val) { + return (val.as_uint64 & HIGH16_TAG) == NAN_TAG_STRING; +} + + +/* C Type Conversions + *************************************************/ + +static inline int32_t ValueAsInt(Value val) { + assert(IsInt(val)); + return (int32_t)val.as_uint64; +} + +static inline double ValueAsReal(Value val) { + assert(IsReal(val)); + val.as_uint64 -= NAN_TAG_DOUBLE; + return val.as_double; +} + +static String* ValueAsString(Value val) { + assert(IsString(val)); + return (String*)(val.as_uint64 & MASK_POINTER); +} + + +//static Array* to_array(Value val) { +// assert(is_array(val)); +// return (Array*)(val.as_pointer); +//} +// +// +//static Block* to_block(Value val) { +// assert(is_block(val)); +// return (Block*)(val.as_pointer); +//} + +//static NativeBlock* to_native(Value val) { +// assert(is_native(val)); +// return (NativeBlock*)(val.as_pointer); +//} +// +//static Symbol* to_symbol(Value val) { +// assert(is_symbol(val)); +// return (Symbol*)(val.as_pointer); +//} + + +/* Value Constructors + *************************************************/ + +static inline Value MakeNil(void) { + return (Value){ .as_uint64 = VALUE_NIL }; +} + +static inline Value MakeBool(bool b) { + return (Value){ .as_uint64 = b ? VALUE_TRUE : VALUE_FALSE }; +} + +static inline Value MakeInt(int32_t i) { + return (Value){ .as_uint64 = NAN_TAG_INTEGER | (uint32_t)i }; +} + +static inline Value MakeReal(double d) { + Value val = { .as_double = d }; + val.as_uint64 += NAN_TAG_DOUBLE; + assert(IsReal(val)); + return val; +} + +static inline Value MakeString(char* s) { + size_t sz = strlen(s); + String* str = calloc(sizeof(String) + sz + 1, 1); + str->length = sz; + strncpy(str->bytes, s, sz+1); + return (Value){ + .as_uint64 = (NAN_TAG_STRING | (uint64_t)str) + }; +} + +//static inline Value MakeRecord(int32_t nslots) { +// Value* record = calloc(nslots+1,sizeof(Value)); +// record[0] = MakeInt(nslots); +// return (Value){ .as_uint64 = (NAN_TAG_RECORD | (uintptr_t)record) }; +//} +// +//static inline Value MakeArray(int32_t nslots) { +// Value* record = calloc(2,sizeof(Value)); +// record[0] = MakeInt(1); +// record[1] = MakeInt(nslots); // +// record[2] = MakeInt(nslots); // +// return (Value){ .as_uint64 = (NAN_TAG_RECORD | (uintptr_t)record) }; +//} + +/* Value Type Conversions + *************************************************/ + +static Value ToString(Value val) { + Value retval; + if (IsString(val)) { + retval = val; + } else if (IsNil(val)) { + retval = MakeString("nil"); + } else if (IsBool(val)) { + retval = MakeString(IsTrue(val) ? "true" : "false"); + } else if (IsInt(val)) { + char str[64]; + snprintf(str, sizeof(str), "%d", ValueAsInt(val)); + retval = MakeString(str); + } else if (IsReal(val)) { + char str[64]; + snprintf(str, sizeof(str), "%f", ValueAsReal(val)); + retval = MakeString(str); + } else { + assert(!"could not convert string"); + } + return retval; +} + +/* Builtin Functions + *************************************************/ + +static void Assert(char* file, int lineno, Value val) { + if (IsFalse(val) || IsNil(val)) { + fprintf(stderr, "%s:%d:error: Assertion failed\n", file, lineno); + exit(1); + } +} -- 2.54.0