From 7a44e5d91951a21e3f1115bef26df3aa478a6bf7 Mon Sep 17 00:00:00 2001 From: Mike Lowis Date: Wed, 16 Oct 2024 15:56:12 -0400 Subject: [PATCH] started implementing type checker --- cerise-c.m | 14 +- cerise-c.rb | 1401 +------------------------------------------ lib/codegen.rb | 282 +++++++++ lib/compiler.rb | 11 + lib/ir.rb | 15 + lib/lexer.rb | 71 +++ lib/parser.rb | 465 ++++++++++++++ lib/sym_table.rb | 67 +++ lib/type_checker.rb | 445 ++++++++++++++ lib/value.rb | 15 + 10 files changed, 1393 insertions(+), 1393 deletions(-) create mode 100644 lib/codegen.rb create mode 100644 lib/compiler.rb create mode 100644 lib/ir.rb create mode 100644 lib/lexer.rb create mode 100644 lib/parser.rb create mode 100644 lib/sym_table.rb create mode 100644 lib/type_checker.rb create mode 100644 lib/value.rb diff --git a/cerise-c.m b/cerise-c.m index 4681534..8c321a3 100644 --- a/cerise-c.m +++ b/cerise-c.m @@ -2,12 +2,12 @@ module TestSuite TestLiterals() { - assert 42 - assert 42.0 +# assert 42 +# assert 42.0 assert true #assert false #assert nil - assert "123" +# assert "123" } TestBoolOps() @@ -81,10 +81,10 @@ TestStringOps() assert "foo" == "foo" assert "foo" != "bar" assert ("foo" + "bar") == "foobar" - assert ("foo" + 123) == "foo123" - assert ("foo" + 123.0) == "foo123.000000" - assert ("foo" + true) == "footrue" - assert ("foo" + false) == "foofalse" +# assert ("foo" + 123) == "foo123" +# assert ("foo" + 123.0) == "foo123.000000" +# assert ("foo" + true) == "footrue" +# assert ("foo" + false) == "foofalse" assert Length("foo") == 3 } diff --git a/cerise-c.rb b/cerise-c.rb index 2bceafe..35ef595 100755 --- a/cerise-c.rb +++ b/cerise-c.rb @@ -6,6 +6,12 @@ # * Add Import/Export statements # * Add constant definitions # * Add type definitions +# * Add/Sub for arrays +# * for/in statement for arrays +# * for/in statement for dictionaries +# +# * Implement sets +# * for/in statement for sets # # * Add unary operators for integers # * Add type checking to operators on constants @@ -16,1390 +22,13 @@ require 'stringio' require 'strscan' +require_relative 'lib/ir' +require_relative 'lib/value' +require_relative 'lib/lexer' +require_relative 'lib/parser' +require_relative 'lib/sym_table' +require_relative 'lib/type_checker' +require_relative 'lib/codegen' +require_relative 'lib/compiler' -## -# Intermediate Representation Module -## - -module IR - Func = Struct.new(:loc, :type, :args, :locals, :body) - Return = Struct.new(:loc, :type, :value) - Assert = Struct.new(:loc, :type, :value) - Const = Struct.new(:loc, :type, :value) - Var = Struct.new(:loc, :type, :name) - Def = Struct.new(:loc, :type, :name, :value) - Set = Struct.new(:loc, :type, :name, :value) - Op = Struct.new(:loc, :type, :op, :left, :right) - Call = Struct.new(:loc, :type, :func, :args) - If = Struct.new(:loc, :type, :cond, :then, :else) -end - - -## -# Data Type Representation Module -## - -module Value - # Forms: :bool, :int, :float, :void, :array, :record, :string, :proc - Type = Struct.new(:form, :fields, :base, :size) - Field = Struct.new(:type, :offset) - - # Base type definitions - Void = Type.new(:void, nil, nil, 0) - Bool = Type.new(:bool, nil, nil, 1) - Int = Type.new(:int, nil, nil, 8) - Float = Type.new(:float, nil, nil, 8) - String = Type.new(:string, nil, nil, 8) -end - - -## -# Lexical Analyzer Definition -## - -class Lexer - Tok = Struct.new(:text, :file, :pos, :type) - - attr_accessor :file - attr_accessor :data - - def initialize(path = nil) - @file = (path || "") - @text = (path ? File.read(path) : "") - @data = StringScanner.new(@text) - end - - def parse_string(str) - @file = "" - @text = str - @data = StringScanner.new(@text) - end - - def linenum(pos = nil) - pos = [pos || @data.pos].flatten.last - @text[0..pos].count("\n") + 1 - end - - SPACE = /([ \t\v\n\r]+|#.*\n)/ - IDENT = /[_a-zA-Z][_a-zA-Z0-9]*/ - BRACES = /[\(\)\[\]\{\}\.]/ - OPERATORS = /[:,<>*\/=+\-\$?!%&|]+/ - INTEGER = /[0-9]+/ - FLOATING = /[0-9]+\.[0-9]+/ - STRING = /"(\\"|[^"])*"/ - ID_TYPES = { - "true" => :bool, - "false" => :bool, - "if" => :if, - "else" => :else, - "def" => :def, - "set" => :set, - "return" => :return, - "assert" => :assert, - "module" => :module - } - - def next - while @data.skip(SPACE) do - end - if not @data.eos? - type = :eof - if @data.scan(IDENT) - type = (ID_TYPES[@data.matched] || :ident) - elsif @data.scan(FLOATING) - type = :float - elsif @data.scan(INTEGER) - type = :int - elsif @data.scan(STRING) - type = :string - elsif @data.scan(BRACES) - type = @data.matched - elsif @data.scan(OPERATORS) - type = @data.matched - end - tok = Tok.new(@data.matched, @file, @data.pos, type) if type - else - tok = Tok.new("EOF", @file, @data.pos, :eof) - end -# pp tok - tok - end -end - - -## -# Code Parsing Module -## - -class Parser - attr_accessor :syms - attr_accessor :checker - attr_reader :path - attr_reader :module - - def initialize(path = nil) - @path = path - @syms = SymTable.new - @checker = TypeChecker.new(self) - - syms.add_builtin(:Length, :func, [:any, :int], nil) - - -# syms.add_builtin(:void, 0, :type, :void) -# syms.add_builtin(:bool, 0, :type, :bool) -# syms.add_builtin(:int, 0, :type, :int) -# syms.add_builtin(:string, 0, :type, :string) -# syms.add_builtin(:float, 0, :type, :float) - -# syms.add_builtin(:Error, :func, [:any, :void], nil) - - parse_file(path) - end - - def symbols() - @syms.globals.map do |k,v| - [k, {kind: v.kind, type: v.type}] - end.to_h - end - - def linenum(pos) - @lex.linenum(pos) - end - - def parse_file(path) - @lex = Lexer.new(path) - @prev = nil - @next = nil - toplevel - end - - def error(str, loc = nil) - file, pos = (loc ? [@lex.file, loc] : [@lex.file, (@next || @prev).pos]) - $stderr.puts "#{file}:#{@lex.linenum(pos)}: #{str}" - exit 1 - end - - def peek() - @next = @lex.next if @next.nil? - @next - end - - def matches(type) - (peek().type == type) - end - - def matches_any(types) - types.any? {|type| peek().type == type } - end - - def accept(type) - if (matches(type)) - @prev = @next - @next = nil - true - else - false - end - end - - def expect(type) - tok = peek() - if not accept(type) - error("expected '#{type}', received '#{tok.type}'") - end - tok - end - - def consume() - expect(peek().type) - end - - def eof? - (peek().type == :eof) - end - - def location() - if @prev - [@prev.file, @prev.pos] - else - [@lex.file, 0] - end - end - - OPERATORS = { - "+" => true, - "-" => true, - "*" => true, - "/" => true, - "%" => true, - "<" => true, - ">" => true, - "<=" => true, - ">=" => true, - "==" => true, - "!=" => true, - "&&" => true, - "||" => true, - } - - def operator? - OPERATORS[peek().type] - end - - def toplevel - expect :module - @module = expect(:ident).text.to_sym - imports - exports - while !matches(:eof) - mod_def = toplevel_defintion() - if mod_def.value.is_a? IR::Const - kind = :const - elsif mod_def.value.is_a? IR::Var - kind = :var - elsif mod_def.value.is_a? IR::Func - kind = :func -# elsif mod_def.value.is_a? IR::Type -# kind = :type - else - raise "invalid toplevel definition #{mod_def.value.class}" - end - - syms.add_sym( - mod_def.name.name, - mod_def.name.loc, - kind, - mod_def.type, - mod_def.value - ) - end - end - - def imports - # TBD - end - - def exports - # TBD - end - - def toplevel_defintion - ident = identifier() - if matches("(") then - val = function_definition(ident) - elsif matches("=") then - val = constant_definition(ident) - else - error("#{ident.name} is not a valid toplevel definition") - end - IR::Def.new(ident.loc, nil, ident, val) - end - - def function_definition(name) - args = function_arglist() - func = IR::Func.new(name.loc, :void, args, [], []) - defs = [] -# if accept(":") -# func.type = expect(:ident).text.to_sym -# end - expect("{") - while matches(:def) - local = variable_definition() - func.locals << local.name.name - defs << local; - end - stmts = statement_list(["}", :return]) - stmts << return_statement - func.body = defs + stmts - expect("}"); - func - end - - def constant_definition(name) - error("constant definitions not yet supported") - end - - def function_arglist() - args = [] - expect("(") - while !matches(")") - id = identifier() -# expect(":") -# id.type = expect(:ident).text.to_sym - args << id - expect(",") if !matches(")") - end - expect(")") - args - end - - def identifier() - name = expect(:ident) - IR::Var.new(name.pos, nil, name.text.to_sym) - end - - - ## - # Statements - ## - def statement_list(terminators) - stmts = [] - while !matches_any(terminators) - stmts << statement - end - stmts - end - - def statement() - if matches(:set) - variable_assignment() - elsif matches(:assert) - assert_statement() - elsif matches(:if) - if_statement() - else - expression() - end - end - - def return_statement() - loc = location - if accept(:return) - IR::Return.new(loc, nil, expression()) - else - IR::Return.new(loc, nil, nil) - end - end - - def variable_definition() - loc = location - expect(:def) - name = identifier() - expect ("=") - value = expression() - IR::Def.new(loc, nil, name, value) - end - - def variable_assignment() - loc = location - expect(:set) - name = identifier() - if matches("[") - name = array_or_hash_access(name) - end - expect ("=") - value = expression() - IR::Set.new(loc, nil, name, value) - end - - def assert_statement() - loc = location - expect(:assert); - IR::Assert.new(loc, nil, expression()) - end - - def if_statement() - loc = location - expect(:if) - cond = expression() - then_branch = statement_block() - if accept(:else) - if matches(:if) - else_branch = [ if_statement() ] - else - else_branch = statement_block() - end - end - IR::If.new(loc, nil, cond, then_branch, else_branch) - end - - def statement_block() - loc = location - stmts = [] - expect("{") - while !matches("}") - stmts << statement() - end - expect("}") - return stmts - end - - - ## - # Expressions - ## - - def expression() - left = logical_expr() - if matches_any(["&&", "||"]) - loc = location - op = consume() - right = logical_expr() - left = make_binop(loc, op.text, left, right) - end - left - end - - - def logical_expr() - left = simple_expr() - if matches_any(["==", "!=", "<", "<=", ">", ">="]) - loc = location - op = consume() - right = simple_expr() - left = make_binop(loc, op.text, left, right) - end - left - end - - def simple_expr() - # Handle unary ops first - if matches_any(["+", "-", :not]) - loc = location - op = consume() - left = term() - left = make_unop(location, op.text, left, nil) - else - left = term() - end - - # now look for binary ops - while matches_any(["+", "-", :or]) - loc = location - op = consume() - right = term() - left = make_binop(loc, op.text, left, right) - end - left - end - - def term() - left = factor() - while matches_any(["*", "/", "%", :and]) - loc = location - op = consume() - right = factor(); - left = make_binop(loc, op.text, left, right) - end - left - end - - def make_binop(loc, op, left, right) - node = IR::Op.new(loc, nil, op, left, right) - node - end - - def make_unop(loc, op, value) - node = IR::Op.new(loc, nil, op, value, nil) - node - end - - def factor() - if accept("(") - expr = expression() - expect(")") - else - expr = const_or_ident() - end - - # Check for function call - if matches("(") - expr = func_call(expr) - elsif matches("[") - expr = array_or_hash_access(expr) - end - expr - end - - def func_call(expr) - call = IR::Call.new(location, nil, expr, []) - expect("(") - while !matches(")") - call.args << expression() - expect(",") if !matches(")") - end - expect(")") - call - end - - def array_or_hash_access(expr) - expect("[") - loc = location - key = expression() - expect("]") - make_binop(loc, "[", expr, key) - end - - def const_or_ident() - if matches("[") - array_literal() - elsif matches("{") - hash_literal() - else - tok = consume() - if tok.type == :bool - IR::Const.new(tok.pos, Value::Bool, tok.text == "true") - elsif tok.type == :string - IR::Const.new(tok.pos, Value::String, tok.text) - elsif tok.type == :int - IR::Const.new(tok.pos, Value::Int, tok.text.to_i) - elsif tok.type == :float - IR::Const.new(tok.pos, Value::Float, tok.text.to_f) - elsif tok.type == :void - IR::Const.new(tok.pos, Value::Void, :void) - elsif tok.type == :ident - IR::Var.new(tok.pos, nil, tok.text.to_sym) - else - error("invalid constant #{tok}") - end - end - end - - def array_literal() - exprs = [] - expect("[") - loc = location() - while !matches("]") - exprs << expression() - expect(",") if !matches("]") - end - expect("]") - IR::Const.new(loc, nil, exprs) - end - - def hash_literal() - dict = {} - expect("{") - loc = location() - while !matches("}") - key = string_or_ident() - expect(":") - val = expression() - dict[key] = val - expect(",") if !matches("}") - end - expect("}") - IR::Const.new(loc, nil, dict) - end - - def string_or_ident() - tok = consume() - if tok.type == :string - IR::Const.new(tok.pos, Value::String, tok.text) - elsif tok.type == :ident - IR::Const.new(tok.pos, Value::String, "\"#{tok.text}\"") - else - error("not a string or identifier: #{tok}") - end - end -end - - -## -# Symbol Table Definition -## - -class SymTable - Symbol = Struct.new(:name, :loc, :kind, :type, :value) - - def initialize() - @scopes = [{}, {}] - end - - def builtin?(name) - (not local? name) and (not (@scopes[0] || {})[name].nil?) - end - - def global?(name) - (not local? name) and (not (@scopes[1] || {})[name].nil?) - end - - def parameter?(name) - (not local? name) and (not (@scopes[2] || {})[name].nil?) - end - - def local?(name) - find_sym((@scopes[3..-1] || []).flatten, name) - end - - def find_sym(scopes, name) - scopes.reverse_each.detect do |scope| - return scope[name] if scope[name] - end - nil - end - - def open_scope() - @scopes.push({}) - end - - def close_scope() - @scopes.pop - end - - def reset_scope() - @scopes = @scopes[0..1] - end - - def [](name) - find_sym(@scopes, name) - end - - def add_builtin(name, kind, type, value) - @scopes[0][name] = - Symbol.new(name, 0, kind, type, value) - end - - def add_sym(name, loc, kind, type, value) - @scopes.last[name] = - Symbol.new(name, loc, kind, type, value) - end - - def each(&block) - @scopes.last.each(&block) - end - - def globals - @scopes[1] - end -end - - -## -# Type Checking/Inference Module -## - -class TypeChecker - BaseToTypes = { - :bool => Value::Bool, - :int => Value::Int, - :float => Value::Float, -# :string => Value::String, - } - - TypesToBase = { - Value::Bool => :bool, - Value::Int => :int, - Value::Float => :float, - } - - UnaryOps = { - "+" => { - int: [:int, :int], - float: [:float, :float], - }, - "-" => { - int: [:int, :int], - float: [:float, :float], - }, - "!" => { - bool: [:bool, :bool] - }, - } - - BinaryOps = { - "+" => { - int: [:int, :int, :int], - float: [:float, :float, :float], - }, - "-" => { - int: [:int, :int, :int], - float: [:float, :float, :float], - }, - "*" => { - int: [:int, :int, :int], - float: [:float, :float, :float], - }, - "/" => { - int: [:int, :int, :int], - float: [:float, :float, :float], - }, - "%" => { - int: [:int, :int, :int], -# Float modulo normally implemented as library function -# float: [:float, :float, :float], - }, - "<" => { - int: [:int, :int, :bool], - float: [:float, :float, :bool], - }, - ">" => { - int: [:int, :int, :bool], - float: [:float, :float, :bool], - }, - "<=" => { - int: [:int, :int, :bool], - float: [:float, :float, :bool], - }, - ">=" => { - int: [:int, :int, :bool], - float: [:float, :float, :bool], - bool: [:bool, :bool, :bool], - }, - "==" => { - int: [:int, :int, :bool], - float: [:float, :float, :bool], - bool: [:bool, :bool, :bool], - string: [:string, :string, :bool], - }, - "!=" => { - int: [:int, :int, :bool], - float: [:float, :float, :bool], - bool: [:bool, :bool, :bool], - string: [:string, :string, :bool], - }, - "&&" => { bool: [:bool, :bool, :bool] }, - "||" => { bool: [:bool, :bool, :bool] }, - "<<" => { int: [:int, :int, :int] }, - ">>" => { int: [:int, :int, :int] }, - "&" => { int: [:int, :int, :int] }, - "^" => { int: [:int, :int, :int] }, - "|" => { int: [:int, :int, :int] }, - } - - def initialize(parser) - @parser = parser - end - - def error(loc, msg) - @parser.error(msg, loc) - end - - def check(env, expr, type) - if expr.is_a? IR::Const - check_const(env, expr, type) - elsif (expr.is_a? IR::Op) - check_op(env, expr, type) - elsif expr.is_a? IR::Var - check_var(env, expr, type) -# elsif expr.is_a? IR::Let -# check_let(env, expr, type) -# elsif expr.is_a? IR::If -# check_ifexpr(env, expr, type) -# elsif expr.is_a? IR::Set -# check_set(env, expr, type) -# elsif expr.is_a? IR::Func -# check_func(env, expr, type) -# elsif expr.is_a? IR::Apply -# check_apply(env, expr, type) - else - etype = infer(env, expr) - if type != etype - error(expr.loc, "expected #{type}, received #{etype}") - end - end - expr.type = type - end - - def infer(env, expr) - if expr.is_a? IR::Func - infer_func(env, expr) - elsif expr.is_a? IR::Return - infer_return(env, expr) - elsif expr.is_a? IR::Const - infer_const(env, expr) - elsif expr.is_a? IR::Var - infer_var(env, expr) - elsif expr.is_a? IR::Def - infer_def(env, expr) - elsif expr.is_a? IR::Set - infer_set(env, expr) - elsif expr.is_a? IR::Op - infer_op(env, expr) - elsif expr.is_a? IR::Call - infer_call(env, expr) - elsif expr.is_a? IR::If - infer_if(env, expr) - else - pp expr - error(expr.loc, "unable to determine type of expression") - end - end - - private - - ## - # TYPE INFERENCE - ## - - def infer_func(env, expr) - raise "unimplemented" - error(expr.loc, "unimplemented") - end - - def infer_return(env, expr) - raise "unimplemented" - error(expr.loc, "unimplemented") - end - - def infer_const(env, expr) - expr.type - end - - def infer_var(env, expr) - if env[expr.name].nil? - error(expr.loc, "symbol '#{expr.name}' not defined") - end - expr.type = env[expr.name][:type] - end - - def infer_def(env, expr) - raise "unimplemented" - error(expr.loc, "unimplemented") - end - - def infer_set(env, expr) - raise "unimplemented" - error(expr.loc, "unimplemented") - end - - def infer_op(env, expr) - # infer the operand type first - vtype = infer(env, expr.left) - if (expr.left and expr.right) - check_binary(env, expr, vtype) - else - check_unary(env, expr, vtype) - end - end - - def infer_call(env, expr) - raise "unimplemented" - error(expr.loc, "unimplemented") - end - - def infer_if(env, expr) - raise "unimplemented" - error(expr.loc, "unimplemented") - end - - ## - # TYPE CHECKING - ## - - - def check_func(env, expr, type) - raise "unimplemented" - error(expr.loc, "unimplemented") - end - - def check_return(env, expr, type) - raise "unimplemented" - error(expr.loc, "unimplemented") - end - - def check_const(env, expr, type) - raise "unimplemented" - error(expr.loc, "unimplemented") -# expr, type.type - end - - def check_var(env, expr, type) - etype = infer(env, expr) - if (etype.class == String) - expr.type = type - env.set_type(expr.name, type) - elsif expr.type != type - error(expr.loc, "expected #{type}, received #{etype}") - end - type - end - - def check_def(env, expr, type) - raise "unimplemented" - error(expr.loc, "unimplemented") - end - - def check_set(env, expr, type) - raise "unimplemented" - error(expr.loc, "unimplemented") - end - - def check_op(env, expr, type) - raise "unimplemented" - error(expr.loc, "unimplemented") - end - - def check_call(env, expr, type) - raise "unimplemented" - error(expr.loc, "unimplemented") - end - - def check_if(env, expr, type) - raise "unimplemented" - error(expr.loc, "unimplemented") - end - - - -# def make_typevar() -# @typevar ||= 0 -# var = "abcdefghijklmnopqrstuvwxyz"[@typevar] -# @typevar += 1 -# var -# end -# -# def var?(expr) -# expr.class == IR::Var -# end -# -# def untyped_global_func?(env, func, type) -# type.nil? and -# var?(func) and -# env.global?(func.name) and -# env[func.name][:value] -# end -# -# def check_apply(env, expr, type) -# # Handle global functions that haven't been typed yet but are -# # being called. We pause to infer their type. -# if untyped_global_func?(env, expr.func, type) -# value = env[expr.func.name][:value] -# env[expr.func.name][:value] = nil -# infer(@parser.syms, value) -# type = infer(env, expr.func) -# end -# -# error(expr.loc, "object being applied is not a function (has type: #{type.to_s})") if not type.is_a? Array -# error(expr.loc, "wrong number of arguments to function call") if (type.length - 1) != expr.args.length -# type[0..-2].each_with_index do |t,i| -# check(env, expr.args[i], t) -# end -# expr.type = type.last -# end -# -# def check_ifexpr(env, expr, type) -# check(env, expr.cond, :bool) -# check(env, expr.then, type) -# check(env, expr.else, type) -# end -# -# -# def verify(cond, loc, msg) -# error(loc, msg) if not cond -# end -# -# def check_func(env, expr, type) -# env = env.clone -# verify((type.length-1) == expr.args.length, expr.loc, -# "incorrect number of arguments (#{expr.args.length}, expected #{(type.length-1)}") -# expr.args.each_with_index do |a,i| -# env.add_sym(a.name, a.loc, :arg, type[i]) -# end -# check(env, expr.expr, type.last) -# end -# -# def check_let(env, let, type) -# env = env.clone -# env.add_sym(let.var.name, let.var.loc, :var, let.var.type) -# check(env, let.expr, type) -# end - -# -# def infer_let(env, let) -# if let.body.nil? -# infer_decl(env, let) -# else -# infer_let_expr(env, let) -# end -# end -# -# def infer_decl(env, let) -# env = env.clone -# # handle the binding -# if let.var.type -# check(env, let.expr, let.var.type) -# else -# let.var.type = infer(env, let.expr) -# end -# env.set_type(let.var.name, let.var.type) -# env[let.var.name][:value] = nil -# let.type = :void -# end -# -# def infer_let_expr(env, let) -# env = env.clone -# -# # handle the binding -# if let.var.type -# check(env, let.expr, let.var.type) -# else -# let.var.type = infer(env, let.expr) -# end -# -# env.add_sym(let.var.name, let.var.loc, :var, let.var.type) -# let.type = infer(env, let.body) -# end -# -# def infer_ifexpr(env, expr) -# check(env, expr, infer(env, expr.then)) -# end -# -# def infer_set(env, expr) -# error(expr.loc, "infer_set unimplemented") -# end -# -# def infer_func(env, expr) -# env = env.clone -# @typevar = 0 -# expr.args.each do |a| -# a.type ||= make_typevar() -# env.add_sym(a.name, a.loc, :arg, a.type) -# end -# infer(env, expr.expr) -# type = (expr.args + [expr.expr]).map {|v| v.type } -# type.unshift(:void) if type.length == 1 -# -# # the body may have inferred an arg type, fix it up here -# expr.args.each_with_index do |a,i| -# a.type = env[a.name][:type] -# type[i] = a.type -# end -# expr.type = type -# end -# -# def infer_apply(env, expr) -# if expr.func.is_a? String -# expr.type = infer_opcall(env, expr) -# else -# type = infer(env, expr.func) -# check_apply(env, expr, type) -# end -# end -# -# def assign_type(env, var, type) -# if var.class == IR::Var and (var.type.nil? or var.type == String) then -# var.type = type -# env[var.name][:type] = type -# end -# end - - - def check_unary(env, expr, vtype) - optype = UnaryOps[expr.op][TypesToBase[vtype]] - error(expr.loc, "unknown unary operation '#{expr.op}' for operand type #{vtype}") if not optype - optype = optype.map{|v| BaseToTypes[v] } - check(env, expr.left, optype[0]) - expr.type = optype[-1] - end - - def check_binary(env, expr, vtype) - pp expr - optype = BinaryOps[expr.op][TypesToBase[vtype]] - error(expr.loc, "unknown binary operation '#{expr.op}' for operand type #{vtype}") if optype.nil? - optype = optype.map{|v| BaseToTypes[v] } - check(env, expr.left, optype[0]) - check(env, expr.right, optype[1]) - expr.type = optype[-1] - end -end - - -## -# Code Generation Module -## - -class Codegen - State = Struct.new(:parser, :label, :locals, :syms, :temp, :indent) - - def initialize(parser) - @parser = parser - @label = 0 - @locals = [] - @syms = @parser.syms - @temp = 0 - @indent = 1 - @output = $stdout -# @state = State.new(parser, 0, [], parser.syms, 0, 1) - end - - def output(outpath) - @output = File.open(outpath, "wb") - - puts "#include \n\n" - @syms.each do |name, val| - args = val.value.args.map{|e| "Value #{e.name}" }.join(", ") - puts "Value #{symname(val)}(#{args});" - end - puts "" - - @syms.each do |name, val| - args = val.value.args.map{|e| "Value #{e.name}" }.join(", ") - puts "Value #{symname(val)}(#{args}) {" - @syms.open_scope - @locals = val.value.locals - val.value.args.each_with_index do |name, idx| - @syms.add_sym(name.name, name.loc, :param, name.type, idx) - end - val.value.body.each do |stmnt| - emit(stmnt) - end - puts "}\n\n" - @syms.close_scope - end - - @output.close - end - - private - - def emit(v) - if v.is_a? IR::Return then - emit_return(v) - elsif v.is_a? IR::Const then - emit_const(v) - elsif v.is_a? IR::Op then - emit_binop(v) - elsif v.is_a? IR::Var then - emit_var(v) - elsif v.is_a? IR::Call then - emit_call(v) - elsif v.is_a? IR::Def then - emit_def(v) - elsif v.is_a? IR::Set then - emit_set(v) - elsif v.is_a? IR::Assert then - emit_assert(v) - elsif v.is_a? IR::If then - emit_if(v) - else - raise "code emitting of #{v.class} not implemented" - end - end - - def emit_assert(v) - temp = emit(v.value) - linenum = @parser.linenum(v.loc[1]) - putln "Assert(#{v.loc[0].inspect}, #{linenum}, #{temp});" - end - - def emit_const(v) - var = mktemp() - if v.value.is_a? Integer - putln "Value #{var} = MakeInt(#{v.value});" - elsif v.value.is_a? Float - putln "Value #{var} = MakeReal(#{v.value});" - elsif v.value == true || v.value == false - putln "Value #{var} = MakeBool(#{v.value});" - elsif v.value.is_a? String - putln "Value #{var} = MakeString(#{v.value});" - elsif v.value.is_a? Array - putln "Value #{var} = MakeArray(#{v.value.length});" - v.value.each_with_index do |e, i| - val = emit(e) - putln "Array_Set(#{var}, #{i}, #{val});" - end - elsif v.value.is_a? Hash - putln "Value #{var} = MakeDict(#{v.value.length});" - v.value.each do |k, v| - val = emit(v) - putln "Dict_Set(#{var}, MakeString(#{k.value}), #{val});" - end - else - raise "code emitting constants of this type not implemented" - end - var - end - - def emit_return(v) - if v.value - temp = emit(v.value) - putln "return #{temp};" - else - putln "return MakeNil();" - end - end - - def emit_binop(v) - if v.op == "&&" - lvar = emit(v.left) - result = mktemp(); - putln "Value #{result} = #{lvar};" - putln "if (IsTrue(#{result})) {" - @indent += 1 - rvar = emit(v.right) - putln "#{result} = #{rvar};" - @indent -= 1 - putln "} else {" - putln " #{result} = MakeBool(false);" - putln "}" - elsif v.op == "||" - lvar = emit(v.left) - result = mktemp(); - putln "Value #{result} = #{lvar};" - putln "if (IsFalse(#{result})) {" - @indent += 1 - rvar = emit(v.right) - putln "#{result} = #{rvar};" - @indent -= 1 - putln "}" - elsif v.op == "[" - lvar = emit(v.left) - rvar = emit(v.right) - result = mktemp(); - putln "Value #{result} = Object_Get(#{lvar}, #{rvar});" - else - lvar = emit(v.left) - rvar = emit(v.right) - result = mktemp(); - case v.op - when "+" - putln "Value #{result} = OpAdd(#{lvar}, #{rvar});" - when "-" - putln "Value #{result} = OpSub(#{lvar}, #{rvar});" - when "*" - putln "Value #{result} = OpMul(#{lvar}, #{rvar});" - when "/" - putln "Value #{result} = OpDiv(#{lvar}, #{rvar});" - when "%" - putln "Value #{result} = OpMod(#{lvar}, #{rvar});" - when "<" - putln "Value #{result} = OpLt(#{lvar}, #{rvar});" - when "<=" - putln "Value #{result} = OpLtEq(#{lvar}, #{rvar});" - when ">" - putln "Value #{result} = OpGt(#{lvar}, #{rvar});" - when ">=" - putln "Value #{result} = OpGtEq(#{lvar}, #{rvar});" - when "==" - putln "Value #{result} = OpEq(#{lvar}, #{rvar});" - when "!=" - putln "Value #{result} = OpNeq(#{lvar}, #{rvar});" - else - raise "not implemented" - end - end - result - end - - def emit_call(v) - result = mktemp() - vars = v.args.reverse.map do |arg| - emit(arg) - end.join(", ") - func = lookup_func(v.func) - if (func.is_a? SymTable::Symbol) then - error v, "symbol '#{func.name}' is not a function" if (func.kind != :func) - putln "Value #{result} = #{symname(func)}(#{vars});" - else - error v, "expression result is not a function" - end - - result - end - - def emit_def(v) - @syms.add_sym(v.name.name, v.loc, :local, v.type, v.value) - temp = emit(v.value) - error v, "invalid local definition: #{v.name.name}" if not @locals.index(v.name.name) - putln "Value #{v.name.name} = #{temp};" - end - - def emit_set(v) - if v.name.is_a? IR::Op - error v, "not a valid array or hash access expression" if v.name.op != "[" - object = emit(v.name.left) - key = emit(v.name.right) - value = emit(v.value) - putln "Object_Set(#{object}, #{key}, #{value});" - else - temp = emit(v.value) - error v, "invalid local definition: #{v.name.name}" if not @locals.index(v.name.name) - putln "#{v.name.name} = #{temp};" - end - end - - def emit_var(v) - var = @syms[v.name] - if var.nil? - error v, "no such variable: #{v.name}" - end - var.name - end - - def emit_if(v) - cond = emit(v.cond) - putln "if (ValueAsBool(#{cond})) {" - emit_block(v.then) - putln "} else {" - emit_block(v.else) - putln "}" - end - - def emit_block(v) - return if v.nil? - @indent += 1 - v.each do |v| - emit(v); - end - @indent -= 1 - end - - def lookup_func(func) - if func.is_a? IR::Var then - value = @syms[func.name] - error func, "no such function: #{func.name}" if value.nil? - else - value = func - end - value - end - - def error(expr, msg) - file = @parser.path - line = @parser.linenum(expr.loc) - $stderr.puts "#{file}:#{line}:error: #{msg}" - exit 1 - end - - def puts(msg) - @output.puts msg - end - - def putln(msg) - @output.puts (" " * @indent) + msg - end - - def mktemp() - name = "_t#{@temp}" - @temp += 1 - name - end - - def genlabel() - state.label = state.label + 1 - end - - def symname(sym) - if @syms.builtin? sym.name - "#{sym.name}" - else - "#{@parser.module}_#{sym.name}" - end - end -end - - -## -# Parse and Analyze -## - -def compile(path) - out_path = path.sub(/\.[^.]+$/,'.c') - parser = Parser.new(path) - checker = TypeChecker.new(parser) - codegen = Codegen.new(parser) - codegen.output(out_path) - `gcc -c -I. -O1 -o cerise-c.o #{out_path}` - parser.symbols -end - -#$parser = Parser.new("cerise-c.m") -#$checker = TypeChecker.new($parser) -#$codegen = Codegen.new($parser) -#$codegen.output("cerise-c.c") -#`gcc -c -I. -O1 -o cerise-c.o #{"cerise-c.c"}` - -#pp $parser - -pp compile("cerise-c.m") \ No newline at end of file +pp Compiler.compile("cerise-c.m") \ No newline at end of file diff --git a/lib/codegen.rb b/lib/codegen.rb new file mode 100644 index 0000000..5c20513 --- /dev/null +++ b/lib/codegen.rb @@ -0,0 +1,282 @@ +## +# Code Generation Module +## +class Codegen + State = Struct.new(:parser, :label, :locals, :syms, :temp, :indent) + + def initialize(parser) + @parser = parser + @label = 0 + @locals = [] + @syms = @parser.syms + @temp = 0 + @indent = 1 + @output = $stdout +# @state = State.new(parser, 0, [], parser.syms, 0, 1) + end + + def output(outpath) + @output = File.open(outpath, "wb") + + puts "#include \n\n" + @syms.each do |name, val| + args = val.value.args.map{|e| "Value #{e.name}" }.join(", ") + puts "Value #{symname(val)}(#{args});" + end + puts "" + + @syms.each do |name, val| + args = val.value.args.map{|e| "Value #{e.name}" }.join(", ") + puts "Value #{symname(val)}(#{args}) {" + @syms.open_scope + @locals = val.value.locals + val.value.args.each_with_index do |name, idx| + @syms.add_sym(name.name, name.loc, :param, name.type, idx) + end + val.value.body.each do |stmnt| + emit(stmnt) + end + puts "}\n\n" + @syms.close_scope + end + + @output.close + end + + private + + def emit(v) + if v.is_a? IR::Return then + emit_return(v) + elsif v.is_a? IR::Const then + emit_const(v) + elsif v.is_a? IR::Op then + emit_binop(v) + elsif v.is_a? IR::Var then + emit_var(v) + elsif v.is_a? IR::Call then + emit_call(v) + elsif v.is_a? IR::Def then + emit_def(v) + elsif v.is_a? IR::Set then + emit_set(v) + elsif v.is_a? IR::Assert then + emit_assert(v) + elsif v.is_a? IR::If then + emit_if(v) + else + raise "code emitting of #{v.class} not implemented" + end + end + + def emit_assert(v) + temp = emit(v.value) + linenum = @parser.linenum(v.loc[1]) + putln "Assert(#{v.loc[0].inspect}, #{linenum}, #{temp});" + end + + def emit_const(v) + var = mktemp() + if v.value.is_a? Integer + putln "Value #{var} = MakeInt(#{v.value});" + elsif v.value.is_a? Float + putln "Value #{var} = MakeReal(#{v.value});" + elsif v.value == true || v.value == false + putln "Value #{var} = MakeBool(#{v.value});" + elsif v.value.is_a? String + putln "Value #{var} = MakeString(#{v.value});" + elsif v.value.is_a? Array + putln "Value #{var} = MakeArray(#{v.value.length});" + v.value.each_with_index do |e, i| + val = emit(e) + putln "Array_Set(#{var}, #{i}, #{val});" + end + elsif v.value.is_a? Hash + putln "Value #{var} = MakeDict(#{v.value.length});" + v.value.each do |k, v| + val = emit(v) + putln "Dict_Set(#{var}, MakeString(#{k.value}), #{val});" + end + else + raise "code emitting constants of this type not implemented" + end + var + end + + def emit_return(v) + if v.value + temp = emit(v.value) + putln "return #{temp};" + else + putln "return MakeNil();" + end + end + + def emit_binop(v) + if v.op == "&&" + lvar = emit(v.left) + result = mktemp(); + putln "Value #{result} = #{lvar};" + putln "if (IsTrue(#{result})) {" + @indent += 1 + rvar = emit(v.right) + putln "#{result} = #{rvar};" + @indent -= 1 + putln "} else {" + putln " #{result} = MakeBool(false);" + putln "}" + elsif v.op == "||" + lvar = emit(v.left) + result = mktemp(); + putln "Value #{result} = #{lvar};" + putln "if (IsFalse(#{result})) {" + @indent += 1 + rvar = emit(v.right) + putln "#{result} = #{rvar};" + @indent -= 1 + putln "}" + elsif v.op == "[" + lvar = emit(v.left) + rvar = emit(v.right) + result = mktemp(); + putln "Value #{result} = Object_Get(#{lvar}, #{rvar});" + else + lvar = emit(v.left) + rvar = emit(v.right) + result = mktemp(); + case v.op + when "+" + putln "Value #{result} = OpAdd(#{lvar}, #{rvar});" + when "-" + putln "Value #{result} = OpSub(#{lvar}, #{rvar});" + when "*" + putln "Value #{result} = OpMul(#{lvar}, #{rvar});" + when "/" + putln "Value #{result} = OpDiv(#{lvar}, #{rvar});" + when "%" + putln "Value #{result} = OpMod(#{lvar}, #{rvar});" + when "<" + putln "Value #{result} = OpLt(#{lvar}, #{rvar});" + when "<=" + putln "Value #{result} = OpLtEq(#{lvar}, #{rvar});" + when ">" + putln "Value #{result} = OpGt(#{lvar}, #{rvar});" + when ">=" + putln "Value #{result} = OpGtEq(#{lvar}, #{rvar});" + when "==" + putln "Value #{result} = OpEq(#{lvar}, #{rvar});" + when "!=" + putln "Value #{result} = OpNeq(#{lvar}, #{rvar});" + else + raise "not implemented" + end + end + result + end + + def emit_call(v) + result = mktemp() + vars = v.args.reverse.map do |arg| + emit(arg) + end.join(", ") + func = lookup_func(v.func) + if (func.is_a? SymTable::Symbol) then + error v, "symbol '#{func.name}' is not a function" if (func.kind != :func) + putln "Value #{result} = #{symname(func)}(#{vars});" + else + error v, "expression result is not a function" + end + + result + end + + def emit_def(v) + @syms.add_sym(v.name.name, v.loc, :local, v.type, v.value) + temp = emit(v.value) + error v, "invalid local definition: #{v.name.name}" if not @locals.index(v.name.name) + putln "Value #{v.name.name} = #{temp};" + end + + def emit_set(v) + if v.name.is_a? IR::Op + error v, "not a valid array or hash access expression" if v.name.op != "[" + object = emit(v.name.left) + key = emit(v.name.right) + value = emit(v.value) + putln "Object_Set(#{object}, #{key}, #{value});" + else + temp = emit(v.value) + error v, "invalid local definition: #{v.name.name}" if not @locals.index(v.name.name) + putln "#{v.name.name} = #{temp};" + end + end + + def emit_var(v) + var = @syms[v.name] + if var.nil? + error v, "no such variable: #{v.name}" + end + var.name + end + + def emit_if(v) + cond = emit(v.cond) + putln "if (ValueAsBool(#{cond})) {" + emit_block(v.then) + putln "} else {" + emit_block(v.else) + putln "}" + end + + def emit_block(v) + return if v.nil? + @indent += 1 + v.each do |v| + emit(v); + end + @indent -= 1 + end + + def lookup_func(func) + if func.is_a? IR::Var then + value = @syms[func.name] + error func, "no such function: #{func.name}" if value.nil? + else + value = func + end + value + end + + def error(expr, msg) + file = @parser.path + line = @parser.linenum(expr.loc) + $stderr.puts "#{file}:#{line}:error: #{msg}" + exit 1 + end + + def puts(msg) + @output.puts msg + end + + def putln(msg) + @output.puts (" " * @indent) + msg + end + + def mktemp() + name = "_t#{@temp}" + @temp += 1 + name + end + + def genlabel() + state.label = state.label + 1 + end + + def symname(sym) + if @syms.builtin? sym.name + "#{sym.name}" + else + "#{@parser.module}_#{sym.name}" + end + end +end diff --git a/lib/compiler.rb b/lib/compiler.rb new file mode 100644 index 0000000..d7d1120 --- /dev/null +++ b/lib/compiler.rb @@ -0,0 +1,11 @@ +module Compiler + def self.compile(path) + out_path = path.sub(/\.[^.]+$/,'.c') + parser = Parser.new(path) + checker = TypeChecker.new(parser) + codegen = Codegen.new(parser) + codegen.output(out_path) + `gcc -c -I. -O1 -o cerise-c.o #{out_path}` + parser.symbols + end +end \ No newline at end of file diff --git a/lib/ir.rb b/lib/ir.rb new file mode 100644 index 0000000..23b5044 --- /dev/null +++ b/lib/ir.rb @@ -0,0 +1,15 @@ +## +# Intermediate Representation Module +## +module IR + Func = Struct.new(:loc, :type, :args, :locals, :body) + Return = Struct.new(:loc, :type, :value) + Assert = Struct.new(:loc, :type, :value) + Const = Struct.new(:loc, :type, :value) + Var = Struct.new(:loc, :type, :name) + Def = Struct.new(:loc, :type, :name, :value) + Set = Struct.new(:loc, :type, :name, :value) + Op = Struct.new(:loc, :type, :op, :left, :right) + Call = Struct.new(:loc, :type, :func, :args) + If = Struct.new(:loc, :type, :cond, :then, :else) +end diff --git a/lib/lexer.rb b/lib/lexer.rb new file mode 100644 index 0000000..dba3890 --- /dev/null +++ b/lib/lexer.rb @@ -0,0 +1,71 @@ +## +# Lexical Analyzer Definition +## +class Lexer + Tok = Struct.new(:text, :file, :pos, :type) + + attr_accessor :file + attr_accessor :data + + def initialize(path = nil) + @file = (path || "") + @text = (path ? File.read(path) : "") + @data = StringScanner.new(@text) + end + + def parse_string(str) + @file = "" + @text = str + @data = StringScanner.new(@text) + end + + def linenum(pos = nil) + pos = [pos || @data.pos].flatten.last + @text[0..pos].count("\n") + 1 + end + + SPACE = /([ \t\v\n\r]+|#.*\n)/ + IDENT = /[_a-zA-Z][_a-zA-Z0-9]*/ + BRACES = /[\(\)\[\]\{\}\.]/ + OPERATORS = /[:,<>*\/=+\-\$?!%&|]+/ + INTEGER = /[0-9]+/ + FLOATING = /[0-9]+\.[0-9]+/ + STRING = /"(\\"|[^"])*"/ + ID_TYPES = { + "true" => :bool, + "false" => :bool, + "if" => :if, + "else" => :else, + "def" => :def, + "set" => :set, + "return" => :return, + "assert" => :assert, + "module" => :module + } + + def next + while @data.skip(SPACE) do + end + if not @data.eos? + type = :eof + if @data.scan(IDENT) + type = (ID_TYPES[@data.matched] || :ident) + elsif @data.scan(FLOATING) + type = :float + elsif @data.scan(INTEGER) + type = :int + elsif @data.scan(STRING) + type = :string + elsif @data.scan(BRACES) + type = @data.matched + elsif @data.scan(OPERATORS) + type = @data.matched + end + tok = Tok.new(@data.matched, @file, @data.pos, type) if type + else + tok = Tok.new("EOF", @file, @data.pos, :eof) + end +# pp tok + tok + end +end diff --git a/lib/parser.rb b/lib/parser.rb new file mode 100644 index 0000000..9a1f48a --- /dev/null +++ b/lib/parser.rb @@ -0,0 +1,465 @@ +## +# Code Parsing Module +## +class Parser + attr_accessor :syms + attr_accessor :checker + attr_reader :path + attr_reader :module + + def initialize(path = nil) + @path = path + @syms = SymTable.new + @checker = TypeChecker.new(self) + + syms.add_builtin(:Length, :func, [:any, :int], nil) + + +# syms.add_builtin(:void, 0, :type, :void) + syms.add_builtin(:bool, 0, :type, :bool) +# syms.add_builtin(:int, 0, :type, :int) +# syms.add_builtin(:string, 0, :type, :string) +# syms.add_builtin(:float, 0, :type, :float) + +# syms.add_builtin(:Error, :func, [:any, :void], nil) + + parse_file(path) + end + + def symbols() + @syms.globals.map do |k,v| + [k, {kind: v.kind, type: v.type}] + end.to_h + end + + def linenum(pos) + @lex.linenum(pos) + end + + def parse_file(path) + @lex = Lexer.new(path) + @prev = nil + @next = nil + toplevel + end + + def error(str, loc = nil) + file, pos = (loc ? [@lex.file, loc] : [@lex.file, (@next || @prev).pos]) + $stderr.puts "#{file}:#{@lex.linenum(pos)}: #{str}" + exit 1 + end + + def peek() + @next = @lex.next if @next.nil? + @next + end + + def matches(type) + (peek().type == type) + end + + def matches_any(types) + types.any? {|type| peek().type == type } + end + + def accept(type) + if (matches(type)) + @prev = @next + @next = nil + true + else + false + end + end + + def expect(type) + tok = peek() + if not accept(type) + error("expected '#{type}', received '#{tok.type}'") + end + tok + end + + def consume() + expect(peek().type) + end + + def eof? + (peek().type == :eof) + end + + def location() + if @prev + [@prev.file, @prev.pos] + else + [@lex.file, 0] + end + end + + OPERATORS = { + "+" => true, + "-" => true, + "*" => true, + "/" => true, + "%" => true, + "<" => true, + ">" => true, + "<=" => true, + ">=" => true, + "==" => true, + "!=" => true, + "&&" => true, + "||" => true, + } + + def operator? + OPERATORS[peek().type] + end + + def toplevel + expect :module + @module = expect(:ident).text.to_sym + imports + exports + while !matches(:eof) + mod_def = toplevel_defintion() + if mod_def.value.is_a? IR::Const + kind = :const + elsif mod_def.value.is_a? IR::Var + kind = :var + elsif mod_def.value.is_a? IR::Func + kind = :func +# elsif mod_def.value.is_a? IR::Type +# kind = :type + else + raise "invalid toplevel definition #{mod_def.value.class}" + end + + syms.add_sym( + mod_def.name.name, + mod_def.name.loc, + kind, + mod_def.type, + mod_def.value + ) + end + end + + def imports + # TBD + end + + def exports + # TBD + end + + def toplevel_defintion + ident = identifier() + if matches("(") then + val = function_definition(ident) + elsif matches("=") then + val = constant_definition(ident) + else + error("#{ident.name} is not a valid toplevel definition") + end + IR::Def.new(ident.loc, nil, ident, val) + end + + def function_definition(name) + args = function_arglist() + func = IR::Func.new(name.loc, :void, args, [], []) + defs = [] +# if accept(":") +# func.type = expect(:ident).text.to_sym +# end + expect("{") + while matches(:def) + local = variable_definition() + func.locals << local.name.name + defs << local; + end + stmts = statement_list(["}", :return]) + stmts << return_statement + func.body = defs + stmts + expect("}"); + func + end + + def constant_definition(name) + error("constant definitions not yet supported") + end + + def function_arglist() + args = [] + expect("(") + while !matches(")") + id = identifier() +# expect(":") +# id.type = expect(:ident).text.to_sym + args << id + expect(",") if !matches(")") + end + expect(")") + args + end + + def identifier() + name = expect(:ident) + IR::Var.new(name.pos, nil, name.text.to_sym) + end + + + ## + # Statements + ## + def statement_list(terminators) + stmts = [] + while !matches_any(terminators) + stmts << statement + end + stmts + end + + def statement() + if matches(:set) + variable_assignment() + elsif matches(:assert) + assert_statement() + elsif matches(:if) + if_statement() + else + expression() + end + end + + def return_statement() + loc = location + if accept(:return) + IR::Return.new(loc, nil, expression()) + else + IR::Return.new(loc, nil, nil) + end + end + + def variable_definition() + loc = location + expect(:def) + name = identifier() + expect ("=") + value = expression() + IR::Def.new(loc, nil, name, value) + end + + def variable_assignment() + loc = location + expect(:set) + name = identifier() + if matches("[") + name = array_or_hash_access(name) + end + expect ("=") + value = expression() + IR::Set.new(loc, nil, name, value) + end + + def assert_statement() + loc = location + expect(:assert); + IR::Assert.new(loc, nil, expression()) + end + + def if_statement() + loc = location + expect(:if) + cond = expression() + then_branch = statement_block() + if accept(:else) + if matches(:if) + else_branch = [ if_statement() ] + else + else_branch = statement_block() + end + end + IR::If.new(loc, nil, cond, then_branch, else_branch) + end + + def statement_block() + loc = location + stmts = [] + expect("{") + while !matches("}") + stmts << statement() + end + expect("}") + return stmts + end + + + ## + # Expressions + ## + + def expression() + left = logical_expr() + if matches_any(["&&", "||"]) + loc = location + op = consume() + right = logical_expr() + left = make_binop(loc, op.text, left, right) + end + left + end + + + def logical_expr() + left = simple_expr() + if matches_any(["==", "!=", "<", "<=", ">", ">="]) + loc = location + op = consume() + right = simple_expr() + left = make_binop(loc, op.text, left, right) + end + left + end + + def simple_expr() + # Handle unary ops first + if matches_any(["+", "-", :not]) + loc = location + op = consume() + left = term() + left = make_unop(location, op.text, left, nil) + else + left = term() + end + + # now look for binary ops + while matches_any(["+", "-", :or]) + loc = location + op = consume() + right = term() + left = make_binop(loc, op.text, left, right) + end + left + end + + def term() + left = factor() + while matches_any(["*", "/", "%", :and]) + loc = location + op = consume() + right = factor(); + left = make_binop(loc, op.text, left, right) + end + left + end + + def make_binop(loc, op, left, right) + node = IR::Op.new(loc, nil, op, left, right) + node + end + + def make_unop(loc, op, value) + node = IR::Op.new(loc, nil, op, value, nil) + node + end + + def factor() + if accept("(") + expr = expression() + expect(")") + else + expr = const_or_ident() + end + + # Check for function call + if matches("(") + expr = func_call(expr) + elsif matches("[") + expr = array_or_hash_access(expr) + end + expr + end + + def func_call(expr) + call = IR::Call.new(location, nil, expr, []) + expect("(") + while !matches(")") + call.args << expression() + expect(",") if !matches(")") + end + expect(")") + call + end + + def array_or_hash_access(expr) + expect("[") + loc = location + key = expression() + expect("]") + make_binop(loc, "[", expr, key) + end + + def const_or_ident() + if matches("[") + array_literal() + elsif matches("{") + hash_literal() + else + tok = consume() + if tok.type == :bool + IR::Const.new(tok.pos, :bool, tok.text == "true") + elsif tok.type == :string + IR::Const.new(tok.pos, :string, tok.text) + elsif tok.type == :int + IR::Const.new(tok.pos, :int, tok.text.to_i) + elsif tok.type == :float + IR::Const.new(tok.pos, :float, tok.text.to_f) + elsif tok.type == :void + IR::Const.new(tok.pos, :void, :void) + elsif tok.type == :ident + IR::Var.new(tok.pos, nil, tok.text.to_sym) + else + error("invalid constant #{tok}") + end + end + end + + def array_literal() + exprs = [] + expect("[") + loc = location() + while !matches("]") + exprs << expression() + expect(",") if !matches("]") + end + expect("]") + IR::Const.new(loc, nil, exprs) + end + + def hash_literal() + dict = {} + expect("{") + loc = location() + while !matches("}") + key = string_or_ident() + expect(":") + val = expression() + dict[key] = val + expect(",") if !matches("}") + end + expect("}") + IR::Const.new(loc, nil, dict) + end + + def string_or_ident() + tok = consume() + if tok.type == :string + IR::Const.new(tok.pos, Value::String, tok.text) + elsif tok.type == :ident + IR::Const.new(tok.pos, Value::String, "\"#{tok.text}\"") + else + error("not a string or identifier: #{tok}") + end + end +end diff --git a/lib/sym_table.rb b/lib/sym_table.rb new file mode 100644 index 0000000..7228edb --- /dev/null +++ b/lib/sym_table.rb @@ -0,0 +1,67 @@ +## +# Symbol Table Definition +## +class SymTable + Symbol = Struct.new(:name, :loc, :kind, :type, :value) + + def initialize() + @scopes = [{}, {}] + end + + def builtin?(name) + (not local? name) and (not (@scopes[0] || {})[name].nil?) + end + + def global?(name) + (not local? name) and (not (@scopes[1] || {})[name].nil?) + end + + def parameter?(name) + (not local? name) and (not (@scopes[2] || {})[name].nil?) + end + + def local?(name) + find_sym((@scopes[3..-1] || []).flatten, name) + end + + def find_sym(scopes, name) + scopes.reverse_each.detect do |scope| + return scope[name] if scope[name] + end + nil + end + + def open_scope() + @scopes.push({}) + end + + def close_scope() + @scopes.pop + end + + def reset_scope() + @scopes = @scopes[0..1] + end + + def [](name) + find_sym(@scopes, name) + end + + def add_builtin(name, kind, type, value) + @scopes[0][name] = + Symbol.new(name, 0, kind, type, value) + end + + def add_sym(name, loc, kind, type, value) + @scopes.last[name] = + Symbol.new(name, loc, kind, type, value) + end + + def each(&block) + @scopes.last.each(&block) + end + + def globals + @scopes[1] + end +end diff --git a/lib/type_checker.rb b/lib/type_checker.rb new file mode 100644 index 0000000..2f628be --- /dev/null +++ b/lib/type_checker.rb @@ -0,0 +1,445 @@ +## +# Type Checking/Inference Module +## +class TypeChecker + BaseToTypes = { + :bool => Value::Bool, + :int => Value::Int, + :float => Value::Float, +# :string => Value::String, + } + + TypesToBase = { + Value::Bool => :bool, + Value::Int => :int, + Value::Float => :float, + } + + UnaryOps = { + "+" => { + int: [:int, :int], + float: [:float, :float], + }, + "-" => { + int: [:int, :int], + float: [:float, :float], + }, + "!" => { + bool: [:bool, :bool] + }, + } + + BinaryOps = { + "+" => { + int: [:int, :int, :int], + float: [:float, :float, :float], + string: [:string, :string, :string], + }, + "-" => { + int: [:int, :int, :int], + float: [:float, :float, :float], + }, + "*" => { + int: [:int, :int, :int], + float: [:float, :float, :float], + }, + "/" => { + int: [:int, :int, :int], + float: [:float, :float, :float], + }, + "%" => { + int: [:int, :int, :int], +# Float modulo normally implemented as library function +# float: [:float, :float, :float], + }, + "<" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + }, + ">" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + }, + "<=" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + }, + ">=" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + bool: [:bool, :bool, :bool], + }, + "==" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + bool: [:bool, :bool, :bool], + string: [:string, :string, :bool], + }, + "!=" => { + int: [:int, :int, :bool], + float: [:float, :float, :bool], + bool: [:bool, :bool, :bool], + string: [:string, :string, :bool], + }, + "&&" => { bool: [:bool, :bool, :bool] }, + "||" => { bool: [:bool, :bool, :bool] }, + "<<" => { int: [:int, :int, :int] }, + ">>" => { int: [:int, :int, :int] }, + "&" => { int: [:int, :int, :int] }, + "^" => { int: [:int, :int, :int] }, + "|" => { int: [:int, :int, :int] }, + } + + def initialize(parser) + @parser = parser + env = parser.syms + parser.symbols.each do |sym, data| + type = infer(env, env[sym].value) + end + end + + def error(loc, msg) + @parser.error(msg, loc) + end + + def check(env, expr, type) + if expr.is_a? IR::Const + check_const(env, expr, type) + elsif (expr.is_a? IR::Op) + check_op(env, expr, type) + elsif expr.is_a? IR::Var + check_var(env, expr, type) +# elsif expr.is_a? IR::Let +# check_let(env, expr, type) +# elsif expr.is_a? IR::If +# check_ifexpr(env, expr, type) +# elsif expr.is_a? IR::Set +# check_set(env, expr, type) +# elsif expr.is_a? IR::Func +# check_func(env, expr, type) +# elsif expr.is_a? IR::Apply +# check_apply(env, expr, type) + else + etype = infer(env, expr) + if type != etype + error(expr.loc, "expected #{type}, received #{etype}") + end + end + expr.type = type + end + + def infer(env, expr) + if expr.is_a? IR::Func + infer_func(env, expr) + elsif expr.is_a? IR::Return + infer_return(env, expr) + elsif expr.is_a? IR::Const + infer_const(env, expr) + elsif expr.is_a? IR::Var + infer_var(env, expr) + elsif expr.is_a? IR::Def + infer_def(env, expr) + elsif expr.is_a? IR::Set + infer_set(env, expr) + elsif expr.is_a? IR::Op + infer_op(env, expr) + elsif expr.is_a? IR::Call + infer_call(env, expr) + elsif expr.is_a? IR::If + infer_if(env, expr) + elsif expr.is_a? IR::Assert + infer_assert(env, expr) + else + pp expr + error(expr.loc, "unable to determine type of expression") + end + end + + private + + ## + # TYPE INFERENCE + ## + + def infer_func(env, expr) + env = env.clone + @typevar = 0 + expr.args.each do |a| + a.type ||= make_typevar() + env.add_sym(a.name, a.loc, :arg, a.type) + end + expr.body.each do |expr| + infer(env, expr) + end + + +# infer(env, expr.body) + type = (expr.args + [expr.body.last]).map {|v| v.type } + type.unshift(:void) if type.length == 1 + + # the body may have inferred an arg type, fix it up here + expr.args.each_with_index do |a,i| + a.type = env[a.name][:type] + type[i] = a.type + end + expr.type = type + end + + def infer_return(env, expr) + if expr.type + expr.type = infer(env, expr.value) + else + expr.type = :void + end + end + + def infer_const(env, expr) + expr.type + end + + def infer_var(env, expr) + if env[expr.name].nil? + error(expr.loc, "symbol '#{expr.name}' not defined") + end + expr.type = env[expr.name][:type] + end + + def infer_def(env, expr) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def infer_set(env, expr) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def infer_op(env, expr) + # infer the operand type first + vtype = infer(env, expr.left) + if (expr.left and expr.right) + check_binary(env, expr, vtype) + else + check_unary(env, expr, vtype) + end + end + + def infer_call(env, expr) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def infer_if(env, expr) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def infer_assert(env, expr) + check(env, expr.value, :bool) + expr.type = :void + end + + ## + # TYPE CHECKING + ## + + + def check_func(env, expr, type) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def check_return(env, expr, type) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def check_const(env, expr, type) + if expr.type != type + error(expr.loc, "expected #{type}, received #{expr.type}") + end + type + end + + def check_var(env, expr, type) + etype = infer(env, expr) + if (etype.class == String) + expr.type = type + env.set_type(expr.name, type) + elsif expr.type != type + error(expr.loc, "expected #{type}, received #{etype}") + end + type + end + + def check_def(env, expr, type) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def check_set(env, expr, type) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def check_op(env, expr, type) + expr.type = expr.type || infer(env, expr) + if expr.type != type + error(expr.loc, "expect #{type}, received #{expr.type}") + end + type + end + + def check_call(env, expr, type) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + def check_if(env, expr, type) + raise "unimplemented" + error(expr.loc, "unimplemented") + end + + + +# def make_typevar() +# @typevar ||= 0 +# var = "abcdefghijklmnopqrstuvwxyz"[@typevar] +# @typevar += 1 +# var +# end +# +# def var?(expr) +# expr.class == IR::Var +# end +# +# def untyped_global_func?(env, func, type) +# type.nil? and +# var?(func) and +# env.global?(func.name) and +# env[func.name][:value] +# end +# +# def check_apply(env, expr, type) +# # Handle global functions that haven't been typed yet but are +# # being called. We pause to infer their type. +# if untyped_global_func?(env, expr.func, type) +# value = env[expr.func.name][:value] +# env[expr.func.name][:value] = nil +# infer(@parser.syms, value) +# type = infer(env, expr.func) +# end +# +# error(expr.loc, "object being applied is not a function (has type: #{type.to_s})") if not type.is_a? Array +# error(expr.loc, "wrong number of arguments to function call") if (type.length - 1) != expr.args.length +# type[0..-2].each_with_index do |t,i| +# check(env, expr.args[i], t) +# end +# expr.type = type.last +# end +# +# def check_ifexpr(env, expr, type) +# check(env, expr.cond, :bool) +# check(env, expr.then, type) +# check(env, expr.else, type) +# end +# +# +# def verify(cond, loc, msg) +# error(loc, msg) if not cond +# end +# +# def check_func(env, expr, type) +# env = env.clone +# verify((type.length-1) == expr.args.length, expr.loc, +# "incorrect number of arguments (#{expr.args.length}, expected #{(type.length-1)}") +# expr.args.each_with_index do |a,i| +# env.add_sym(a.name, a.loc, :arg, type[i]) +# end +# check(env, expr.expr, type.last) +# end +# +# def check_let(env, let, type) +# env = env.clone +# env.add_sym(let.var.name, let.var.loc, :var, let.var.type) +# check(env, let.expr, type) +# end + +# +# def infer_let(env, let) +# if let.body.nil? +# infer_decl(env, let) +# else +# infer_let_expr(env, let) +# end +# end +# +# def infer_decl(env, let) +# env = env.clone +# # handle the binding +# if let.var.type +# check(env, let.expr, let.var.type) +# else +# let.var.type = infer(env, let.expr) +# end +# env.set_type(let.var.name, let.var.type) +# env[let.var.name][:value] = nil +# let.type = :void +# end +# +# def infer_let_expr(env, let) +# env = env.clone +# +# # handle the binding +# if let.var.type +# check(env, let.expr, let.var.type) +# else +# let.var.type = infer(env, let.expr) +# end +# +# env.add_sym(let.var.name, let.var.loc, :var, let.var.type) +# let.type = infer(env, let.body) +# end +# +# def infer_ifexpr(env, expr) +# check(env, expr, infer(env, expr.then)) +# end +# +# def infer_set(env, expr) +# error(expr.loc, "infer_set unimplemented") +# end +# +# +# def infer_apply(env, expr) +# if expr.func.is_a? String +# expr.type = infer_opcall(env, expr) +# else +# type = infer(env, expr.func) +# check_apply(env, expr, type) +# end +# end +# +# def assign_type(env, var, type) +# if var.class == IR::Var and (var.type.nil? or var.type == String) then +# var.type = type +# env[var.name][:type] = type +# end +# end + + + def check_unary(env, expr, vtype) + optype = UnaryOps[expr.op][TypesToBase[vtype]] + error(expr.loc, "unknown unary operation '#{expr.op}' for operand type #{vtype}") if not optype + optype = optype.map{|v| BaseToTypes[v] } + check(env, expr.left, optype[0]) + expr.type = optype[-1] + end + + def check_binary(env, expr, vtype) + optype = BinaryOps[expr.op][vtype] + error(expr.loc, "unknown binary operation '#{expr.op}' for operand type #{vtype}") if optype.nil? + check(env, expr.left, optype[0]) + check(env, expr.right, optype[1]) + expr.type = optype.last + end +end diff --git a/lib/value.rb b/lib/value.rb new file mode 100644 index 0000000..5fb8494 --- /dev/null +++ b/lib/value.rb @@ -0,0 +1,15 @@ +## +# Data Type Representation Module +## +module Value + # Forms: :bool, :int, :float, :void, :array, :record, :string, :proc + Type = Struct.new(:form, :fields, :base, :size) + Field = Struct.new(:type, :offset) + + # Base type definitions + Void = Type.new(:void, nil, nil, 0) + Bool = Type.new(:bool, nil, nil, 1) + Int = Type.new(:int, nil, nil, 8) + Float = Type.new(:float, nil, nil, 8) + String = Type.new(:string, nil, nil, 8) +end -- 2.54.0