--- /dev/null
+#!/usr/bin/env ruby
+
+require 'stringio'
+require 'strscan'
+
+class Lexer
+ Tok = Struct.new(:text, :file, :pos, :type)
+
+ attr_accessor :file
+ attr_accessor :data
+
+ def initialize(path = nil)
+ @file = (path || "<input>")
+ @text = (path ? File.read(path) : "")
+ @data = StringScanner.new(@text)
+ end
+
+ def parse_string(str)
+ @file = "<input>"
+ @text = str
+ @data = StringScanner.new(@text)
+ end
+
+ def linenum(pos = nil)
+ @text[0..(pos || @data.pos)].count("\n") + 1
+ end
+
+ SPACE = /([ \t\v\n\r]+|#.*\n)/
+ IDENT = /[_a-zA-Z][_a-zA-Z0-9]*!?/
+ BRACES = /[\(\)\[\]\{\}\.]/
+ OPERATORS = /[:,<>*\/=+\-\$?!]+/
+ INTEGER = /[0-9]+/
+ FLOATING = /[0-9]+\.[0-9]+/
+ STRING = /"(\\"|[^"])*"/
+ ID_TYPES = {
+ "true" => :bool,
+ "false" => :bool,
+ "if" => :if,
+ "then" => :then,
+ "else" => :else,
+ "end" => :end,
+ "let" => :let,
+ "func" => :func,
+ "in" => :in,
+ "set!" => :set,
+ "is" => :is,
+ "return" => :return,
+ }
+
+ def next
+ while @data.skip(SPACE) do
+ end
+ if not @data.eos?
+ type = :eof
+ if @data.scan(IDENT)
+ type = (ID_TYPES[@data.matched] || :ident)
+ elsif @data.scan(FLOATING)
+ type = :float
+ elsif @data.scan(INTEGER)
+ type = :int
+ elsif @data.scan(STRING)
+ type = :string
+ elsif @data.scan(BRACES)
+ type = @data.matched
+ elsif @data.scan(OPERATORS)
+ type = @data.matched
+ end
+ Tok.new(@data.matched, @file, @data.pos, type) if type
+ else
+ Tok.new("EOF", @file, @data.pos, :eof)
+ end
+ end
+end
+
+class SymTable < Hash
+ attr_accessor :parent
+
+ def initialize(parent = nil)
+ @global_scope = true if not parent.nil?
+ @parent = (parent || {})
+ end
+
+ def clone()
+ SymTable.new(self)
+ end
+
+ def [](key)
+ (super(key) || @parent[key])
+ end
+
+ def defined?(key)
+ not self[key].nil?
+ end
+
+ def typed?(key)
+ not (self[key] || {})[:type].nil?
+ end
+
+ def add_sym(name, loc, kind, type = nil)
+ self[name] = {
+ loc: loc,
+ kind: kind,
+ type: type
+ }
+ end
+
+ def set_type(name, type)
+ (self[name] || {})[:type] = type
+ end
+
+ def global?(name)
+ if self[name]
+ @global_scope
+ elsif @parent.class == SymTable
+ @parent.global?(name)
+ else
+ nil
+ end
+ end
+
+ def depth
+ count = 0
+ env = self
+ while env.class != Hash
+ count += 1
+ env = env.parent
+ end
+ count
+ end
+
+# def local(key)
+# method(:[]).super_method.call(key)
+# end
+#
+# def defined_ever?(key)
+# (not (self[key] || {})[:type].nil?)
+# end
+#
+# def defined_locally?(key)
+# (not (local(key) || {})[:type].nil?)
+# end
+#
+# def block_local?(key)
+# (not local(key).nil?)
+# end
+#
+# def global?(key)
+# if @parent.class == Hash
+# block_local?(key)
+# elsif block_local? key
+# false
+# else
+# @parent.global? key
+# end
+# end
+#
+# def local?(key)
+# if @parent.class == Hash
+# false
+# elsif @type == :func
+# block_local? key
+# else
+# @parent.local? key
+# end
+# end
+#
+# def free?(key)
+# (defined_ever? key) and (not local? key) and (not global? key)
+# end
+#
+# def merge!(env)
+# env.each {|k,v| self[k] = v }
+# end
+#
+# def annotate(key, type)
+# self[key] ||= {}
+# self[key][:ann] = type
+# end
+#
+# def annotation(key)
+# (method(:[]).super_method.call(key) || {})[:ann]
+# end
+#
+# def stack()
+# parent = (@parent.is_a?(SymTable) ? @parent.stack : [])
+# ([self] + parent).flatten
+# end
+end
+
+module IR
+ Func = Struct.new(:loc, :type, :args, :body)
+ Return = Struct.new(:loc, :type, :value)
+ Const = Struct.new(:loc, :type, :value)
+ Var = Struct.new(:loc, :type, :name)
+
+ Def = Struct.new(:loc, :type, :name, :value)
+
+# EnvRef = Struct.new(:loc, :type, :index)
+# Let = Struct.new(:loc, :type, :var, :expr, :body)
+# If = Struct.new(:loc, :type, :cond, :then, :else)
+# Set = Struct.new(:loc, :type, :var, :expr)
+# Apply = Struct.new(:loc, :type, :func, :args)
+end
+
+
+class TypeChecker
+ UnaryOps = {
+ "+" => {
+ int: [:int, :int],
+ float: [:float, :float],
+ },
+ "-" => {
+ int: [:int, :int],
+ float: [:float, :float],
+ },
+ "!" => {
+ bool: [:bool, :bool]
+ },
+ }
+
+ BinaryOps = {
+ "+" => {
+ int: [:int, :int, :int],
+ float: [:float, :float, :float],
+ },
+ "-" => {
+ int: [:int, :int, :int],
+ float: [:float, :float, :float],
+ },
+ "*" => {
+ int: [:int, :int, :int],
+ float: [:float, :float, :float],
+ },
+ "/" => {
+ int: [:int, :int, :int],
+ float: [:float, :float, :float],
+ },
+ "%" => {
+ int: [:int, :int, :int],
+# Float modulo normally implemented as library function
+# float: [:float, :float, :float],
+ },
+ "<" => {
+ int: [:int, :int, :bool],
+ float: [:float, :float, :bool],
+ },
+ ">" => {
+ int: [:int, :int, :bool],
+ float: [:float, :float, :bool],
+ },
+ "<=" => {
+ int: [:int, :int, :bool],
+ float: [:float, :float, :bool],
+ },
+ ">=" => {
+ int: [:int, :int, :bool],
+ float: [:float, :float, :bool],
+ },
+ "==" => {
+ int: [:int, :int, :bool],
+ float: [:float, :float, :bool],
+ string: [:string, :string, :bool],
+ },
+ "!=" => {
+ int: [:int, :int, :bool],
+ float: [:float, :float, :bool],
+ string: [:string, :string, :bool],
+ },
+ "&&" => { bool: [:bool, :bool, :bool] },
+ "||" => { bool: [:bool, :bool, :bool] },
+ "<<" => { int: [:int, :int, :int] },
+ ">>" => { int: [:int, :int, :int] },
+ "&" => { int: [:int, :int, :int] },
+ "^" => { int: [:int, :int, :int] },
+ "|" => { int: [:int, :int, :int] },
+ }
+
+ def initialize(parser)
+ @parser = parser
+ end
+
+ def error(loc, msg)
+ @parser.error(msg, loc)
+ end
+
+ def check(env, expr, type)
+ if (expr.is_a? IR::If)
+ check_ifexpr(env, expr, type)
+ elsif (expr.is_a? IR::Func)
+ check_func(env, expr, type)
+ elsif (expr.is_a? IR::Var)
+ check_var(env, expr, type)
+ elsif (expr.is_a? IR::Let)
+ check_let(env, expr, type)
+# elsif (expr.is_a? IR::Apply)
+# check_apply(env, expr, type)
+ else
+ etype = infer(env, expr)
+ if type != etype
+ error(expr.loc, "expected #{type}, received #{etype}")
+ end
+ end
+ expr.type = type
+ end
+
+ def infer(env, expr)
+ if expr.is_a? IR::Const
+ infer_const(env, expr)
+ elsif expr.is_a? IR::Var
+ infer_var(env, expr)
+ elsif expr.is_a? IR::Let
+ infer_let(env, expr)
+ elsif expr.is_a? IR::If
+ infer_ifexpr(env, expr)
+ elsif expr.is_a? IR::Set
+ infer_set(env, expr)
+ elsif expr.is_a? IR::Func
+ infer_func(env, expr)
+ elsif expr.is_a? IR::Apply
+ infer_apply(env, expr)
+ else
+ error(expr.loc, "unable to determine type of expression")
+ end
+ end
+
+ private
+
+ def make_typevar()
+ @typevar ||= 0
+ var = "abcdefghijklmnopqrstuvwxyz"[@typevar]
+ @typevar += 1
+ var
+ end
+
+ def var?(expr)
+ expr.class == IR::Var
+ end
+
+ def untyped_global_func?(env, func, type)
+ type.nil? and
+ var?(func) and
+ env.global?(func.name) and
+ env[func.name][:value]
+ end
+
+ def check_apply(env, expr, type)
+ # Handle global functions that haven't been typed yet but are
+ # being called. We pause to infer their type.
+ if untyped_global_func?(env, expr.func, type)
+ value = env[expr.func.name][:value]
+ env[expr.func.name][:value] = nil
+ infer(@parser.syms, value)
+ type = infer(env, expr.func)
+ end
+
+ error(expr.loc, "object being applied is not a function (has type: #{type.to_s})") if not type.is_a? Array
+ error(expr.loc, "wrong number of arguments to function call") if (type.length - 1) != expr.args.length
+ type[0..-2].each_with_index do |t,i|
+ check(env, expr.args[i], t)
+ end
+ expr.type = type.last
+ end
+
+ def check_ifexpr(env, expr, type)
+ check(env, expr.cond, :bool)
+ check(env, expr.then, type)
+ check(env, expr.else, type)
+ end
+
+ def check_var(env, expr, type)
+ etype = infer(env, expr)
+ if (etype.class == String)
+ expr.type = type
+ env.set_type(expr.name, type)
+ elsif expr.type != type
+ error(expr.loc, "expected #{type}, received #{etype}")
+ end
+ type
+ end
+
+ def verify(cond, loc, msg)
+ error(loc, msg) if not cond
+ end
+
+ def check_func(env, expr, type)
+ env = env.clone
+ verify((type.length-1) == expr.args.length, expr.loc,
+ "incorrect number of arguments (#{expr.args.length}, expected #{(type.length-1)}")
+ expr.args.each_with_index do |a,i|
+ env.add_sym(a.name, a.loc, :arg, type[i])
+ end
+ check(env, expr.expr, type.last)
+ end
+
+ def check_let(env, let, type)
+ env = env.clone
+ env.add_sym(let.var.name, let.var.loc, :var, let.var.type)
+ check(env, let.expr, type)
+ end
+
+ def infer_const(env, expr)
+ expr.type
+ end
+
+ def infer_var(env, expr)
+ if not env.defined?(expr.name)
+ error(expr.loc, "symbol '#{expr.name}' not defined")
+ end
+ expr.type = env[expr.name][:type]
+ end
+
+ def infer_let(env, let)
+ if let.body.nil?
+ infer_decl(env, let)
+ else
+ infer_let_expr(env, let)
+ end
+ end
+
+ def infer_decl(env, let)
+ env = env.clone
+ # handle the binding
+ if let.var.type
+ check(env, let.expr, let.var.type)
+ else
+ let.var.type = infer(env, let.expr)
+ end
+ env.set_type(let.var.name, let.var.type)
+ env[let.var.name][:value] = nil
+ let.type = :void
+ end
+
+ def infer_let_expr(env, let)
+ env = env.clone
+
+ # handle the binding
+ if let.var.type
+ check(env, let.expr, let.var.type)
+ else
+ let.var.type = infer(env, let.expr)
+ end
+
+ env.add_sym(let.var.name, let.var.loc, :var, let.var.type)
+ let.type = infer(env, let.body)
+ end
+
+ def infer_ifexpr(env, expr)
+ check(env, expr, infer(env, expr.then))
+ end
+
+ def infer_set(env, expr)
+ error(expr.loc, "infer_set unimplemented")
+ end
+
+ def infer_func(env, expr)
+ env = env.clone
+ @typevar = 0
+ expr.args.each do |a|
+ a.type ||= make_typevar()
+ env.add_sym(a.name, a.loc, :arg, a.type)
+ end
+ infer(env, expr.expr)
+ type = (expr.args + [expr.expr]).map {|v| v.type }
+ type.unshift(:void) if type.length == 1
+
+ # the body may have inferred an arg type, fix it up here
+ expr.args.each_with_index do |a,i|
+ a.type = env[a.name][:type]
+ type[i] = a.type
+ end
+ expr.type = type
+ end
+
+ def infer_apply(env, expr)
+ if expr.func.is_a? String
+ expr.type = infer_opcall(env, expr)
+ else
+ type = infer(env, expr.func)
+ check_apply(env, expr, type)
+ end
+ end
+
+ def assign_type(env, var, type)
+ if var.class == IR::Var and (var.type.nil? or var.type == String) then
+ var.type = type
+ env[var.name][:type] = type
+ end
+ end
+
+ def infer_opcall(env, expr)
+ # infer the operand type first
+ vtype = infer(env, expr.args[0])
+ if (not vtype or vtype.class == String) and expr.args.length == 2
+ vtype = infer(env, expr.args[1])
+ end
+
+ # use the operand type to pick op type and check it
+ if expr.args.length == 1
+ check_unary(env, expr, vtype)
+
+ elsif expr.args.length == 2
+ check_binary(env, expr, vtype)
+ else
+ error(expr.loc, "too many operands for operator '#{expr.func}'")
+ end
+ end
+
+ def check_unary(env, expr, vtype)
+ optype = UnaryOps[expr.func][vtype]
+ error(expr.loc, "unknown unary operation '#{expr.func}' for operand type #{vtype}") if not optype
+ check_apply(env, expr, optype)
+ end
+
+ def check_binary(env, expr, vtype)
+ optype = BinaryOps[expr.func][vtype]
+ error(expr.loc, "unknown binary operation '#{expr.func}' for operand type #{vtype}") if optype.nil?
+
+ expr.args.each_with_index do |a, i|
+ assign_type(env, a, optype[i])
+ end
+ check_apply(env, expr, optype)
+ end
+end
+
+
+class Parser
+ attr_accessor :exprs
+ attr_accessor :syms
+
+ def initialize(path = nil)
+ @syms = SymTable.new
+ parse_file(path)
+ syms.add_sym(:void, 0, :type, :void)
+ syms.add_sym(:bool, 0, :type, :bool)
+ syms.add_sym(:int, 0, :type, :int)
+ syms.add_sym(:string, 0, :type, :string)
+ syms.add_sym(:float, 0, :type, :float)
+ parse_file(path)
+ end
+
+ def parse_string(str)
+ @lex = Lexer.new()
+ @lex.parse_string(str)
+ @prev = nil
+ @next = nil
+ toplevel
+ end
+
+ def parse_file(path)
+ @lex = Lexer.new(path)
+ @prev = nil
+ @next = nil
+ toplevel
+ end
+
+ def toplevel
+ raise "toplevel() must be implemented by inheriting class"
+ end
+
+ def error(str, loc = nil)
+ file, pos = (loc ? [@lex.file, loc] : [@lex.file, (@next || @prev).pos])
+ $stderr.puts "#{file}:#{@lex.linenum(pos)}: #{str}"
+ exit 1
+ end
+
+ def peek()
+ @next = @lex.next if @next.nil?
+ @next
+ end
+
+ def matches(type)
+ (peek().type == type)
+ end
+
+ def matches_any(types)
+ types.any? {|type| peek().type == type }
+ end
+
+ def accept(type)
+ if (matches(type))
+ @prev = @next
+ @next = nil
+ true
+ else
+ false
+ end
+ end
+
+ def expect(type)
+ tok = peek()
+ if not accept(type)
+ error("expected '#{type}', received '#{tok.type}'")
+ end
+ tok
+ end
+
+ def consume()
+ expect(peek().type)
+ end
+
+ def eof?
+ (peek().type == :eof)
+ end
+
+ def location()
+ if @prev
+ [@prev.file, @prev.pos]
+ else
+ [@lex.file, 0]
+ end
+ end
+
+
+
+
+
+ OPERATORS = {
+ "+" => true,
+ "-" => true,
+ "*" => true,
+ "/" => true,
+ "%" => true,
+ }
+
+ def operator?
+ OPERATORS[peek().type]
+ end
+
+ def toplevel
+ imports
+ exports
+ @defs = {}
+ while !matches(:eof)
+ mod_def = toplevel_defintion()
+ @defs[mod_def.name.text] = mod_def
+
+# if decl.class == IR::Let
+# syms[decl.var.name][:value] = decl
+# end
+# @exprs << decl
+ end
+
+# @type_checker = TypeChecker.new(self)
+# @exprs = []
+# @exprs = @exprs.compact
+# @exprs.each do |e|
+# @type_checker.infer(syms, e)
+# end
+# pp syms
+ pp @defs
+ end
+
+ def imports
+ # TBD
+ end
+
+ def exports
+ # TBD
+ end
+
+ def toplevel_defintion
+ ident = identifier()
+ if matches("(") then
+ val = function_definition(ident)
+ elsif matches("=") then
+ val = constant_definition(ident)
+ elsif matches(":") then
+ val = type_annotation(ident)
+ elsif matches(:is)
+ val = type_definition(ident)
+ else
+ error("#{ident.name} is not a valid toplevel definition")
+ end
+ IR::Def.new(name.loc, nil, ident, val)
+ end
+
+ def function_definition(name)
+ args = function_arglist()
+ func = IR::Func.new(name.loc, nil, args, [])
+ expect("{")
+ expect(:return);
+ if !matches("}")
+ func.body << IR::Return.new(location, nil, expression())
+ end
+ expect("}");
+ end
+
+ def constant_definition(name)
+ error("constant definitions not yet supported")
+ end
+
+ def type_definition(name)
+ error("type definitions not yet supported")
+ end
+
+ def type_annotation(name)
+ error("type annotations not yet supported")
+ end
+
+
+ def function_arglist()
+ args = []
+ expect("(")
+ while !matches(")")
+ args << identifier()
+ expect(",") if !matches(")")
+ end
+ expect(")")
+ args
+ end
+
+ def identifier()
+ name = expect(:ident)
+ IR::Var.new(name.pos, nil, name.text.to_sym)
+ end
+
+ def expression()
+ constant()
+ end
+
+ def constant()
+ tok = consume()
+ if tok.type == :bool
+ IR::Const.new(tok.pos, :bool, tok.text == "true")
+ elsif tok.type == :string
+ IR::Const.new(tok.pos, :string, tok.text)
+ elsif tok.type == :int
+ IR::Const.new(tok.pos, :int, tok.text.to_i)
+ elsif tok.type == :float
+ IR::Const.new(tok.pos, :float, tok.text.to_f)
+ elsif tok.type == :void
+ IR::Const.new(tok.pos, :void, :void)
+ else
+ error("invalid constant")
+ end
+ end
+
+
+
+# def declaration()
+# if matches(:ident)
+# expr = ident()
+# expr.type = syms[expr.name][:type] if syms[expr.name]
+# if accept("=")
+# value = expression()
+# syms.add_sym(expr.name, expr.loc, :var, expr.type)
+# IR::Let.new(expr.loc, nil, expr, value, nil)
+# elsif accept(":")
+# expr.type = type_spec()
+# syms.add_sym(expr.name, expr.loc, :var, expr.type)
+# nil
+# else
+# expression()
+# end
+# else
+# expression()
+# end
+# end
+#
+# def expression()
+# if matches(:let)
+# let_expr()
+# else
+# complex_expr()
+# end
+# end
+#
+# def complex_expr()
+# expr = nil
+# if matches(:if)
+# expr = if_expr()
+# elsif matches(:set)
+# expr = var_set()
+# elsif matches_any(["+","-", "!"])
+# expr = unop_call()
+# else
+# expr = atomic_expr()
+# if matches("(")
+# expr = func_call(expr)
+# elsif operator?
+# expr = binop_call(expr)
+# end
+# end
+# expr
+# end
+#
+# def atomic_expr()
+# if matches(:func)
+# func_expr()
+# elsif matches(:ident)
+# ident()
+# else
+# constant()
+# end
+# end
+#
+#
+#
+#
+# def application()
+## expr = atomic_expr()
+## expr = func_call(expr) if matches("(")
+# # EQ, NEQ, '<', LTEQ, '>', GTEQ
+# end
+#
+# def simple_expr()
+# # '+', '-', OR
+# end
+#
+# def term()
+# # '*', '/', '%', AND
+# end
+#
+# def factor()
+# # '(', NOT, atomic
+# end
+#
+#
+#
+#
+# def unop_call()
+# op = consume()
+# rhs = atomic_expr()
+# IR::Apply.new(op.pos, nil, op.type, [rhs])
+# end
+#
+# def binop_call(expr)
+# op = consume()
+# rhs = atomic_expr()
+# IR::Apply.new(expr.loc, nil, op.type, [expr, rhs])
+# end
+#
+# def if_expr()
+# loc = expect(:if).pos
+# cond = atomic_expr()
+# expect(:then)
+# branch1 = expression()
+# expect(:else)
+# branch2 = expression()
+# IR::If.new(loc, nil, cond, branch1, branch2)
+# end
+#
+# def var_set()
+# loc = expect(:set).pos
+# name = ident()
+# expect("=")
+# expr = expression()
+# IR::Set.new(loc, nil, name, expr)
+# end
+#
+# def func_call(func)
+# args = []
+# expect("(")
+# while !matches(")")
+# args << atomic_expr()
+# expect(",") if not matches(")")
+# end
+# expect(")")
+# IR::Apply.new(func.loc, nil, func, args)
+# end
+#
+# def func_expr()
+# loc = expect(:func).pos
+# args = []
+# expect("(")
+# while !matches(")")
+# args << ident()
+# args.last.type = type_spec() if accept(":")
+# expect(",") if not matches(")")
+# end
+# expect(")")
+# type = type_spec() if accept(":")
+# body = expression()
+# IR::Func.new(loc, type, args, body)
+# end
+#
+# def ident()
+# name = expect(:ident)
+# IR::Var.new(name.pos, nil, name.text.to_sym)
+# end
+#
+# def typename()
+# name = expect(:ident)
+# sym = syms[name.text]
+# if sym && sym[:kind] != :type
+# error(name.pos, "#{name.text} is not a valid type")
+# end
+# IR::Var.new(name.pos, nil, name.text.to_sym)
+# end
+#
+#
+# def let_expr()
+# expect(:let)
+# name = ident()
+# name.type = type_spec() if accept(":")
+# expect("=")
+# expr = complex_expr()
+# expect(:in)
+# body = expression()
+# IR::Let.new(name.loc, nil, name, expr, body)
+# end
+#
+# def type_spec()
+# type = [typename().name]
+# while accept("->")
+# type << typename().name
+# end
+# (type.length == 1 ? type[0] : type)
+# end
+end
+
+$parser = Parser.new("cerise.m")
+pp $parser