--- /dev/null
+#!/usr/bin/env ruby
+require 'stringio'
+require_relative "utils/base_lexer"
+require_relative "utils/base_parser"
+require_relative "utils/anf"
+
+class Lexer < BaseLexer
+ SPACE = /([ \t\v\n\r]+|#.*\n)/
+ IDENT = /[_a-zA-Z][_a-zA-Z0-9]*!?/
+ BRACES = /[\(\)\[\]\{\}\.]/
+ OPERATORS = /[:,<>*\/=+\-\$?!]+/
+ INTEGER = /[0-9]+/
+ FLOATING = /[0-9]+\.[0-9]+/
+ STRING = /"(\\"|[^"])*"/
+ ID_TYPES = {
+ "true" => :bool,
+ "false" => :bool,
+ "if" => :if,
+ "then" => :then,
+ "else" => :else,
+ "end" => :end,
+ "let" => :let,
+ "func" => :func,
+ "in" => :in,
+ "set!" => :set,
+ }
+
+ def next
+ while @data.skip(SPACE) do
+ end
+ if not @data.eos?
+ type = :eof
+ if @data.scan(IDENT)
+ type = (ID_TYPES[@data.matched] || :ident)
+ elsif @data.scan(FLOATING)
+ type = :float
+ elsif @data.scan(INTEGER)
+ type = :int
+ elsif @data.scan(STRING)
+ type = :string
+ elsif @data.scan(BRACES)
+ type = @data.matched
+ elsif @data.scan(OPERATORS)
+ type = @data.matched
+ end
+ Tok.new(@data.matched, @file, @data.pos, type) if type
+ else
+ Tok.new("EOF", @file, @data.pos, :eof)
+ end
+ end
+end
+
+class TypeChecker
+ UnaryOps = {
+ "+" => {
+ int: [:int, :int],
+ float: [:float, :float],
+ },
+ "-" => {
+ int: [:int, :int],
+ float: [:float, :float],
+ },
+ "!" => {
+ bool: [:bool, :bool]
+ },
+ }
+
+ BinaryOps = {
+ "+" => {
+ int: [:int, :int, :int],
+ float: [:float, :float, :float],
+ },
+ "-" => {
+ int: [:int, :int, :int],
+ float: [:float, :float, :float],
+ },
+ "*" => {
+ int: [:int, :int, :int],
+ float: [:float, :float, :float],
+ },
+ "/" => {
+ int: [:int, :int, :int],
+ float: [:float, :float, :float],
+ },
+ "%" => {
+ int: [:int, :int, :int],
+ float: [:float, :float, :float],
+ },
+ "<" => {
+ int: [:int, :int, :bool],
+ float: [:float, :float, :bool],
+ },
+ ">" => {
+ int: [:int, :int, :bool],
+ float: [:float, :float, :bool],
+ },
+ "<=" => {
+ int: [:int, :int, :bool],
+ float: [:float, :float, :bool],
+ },
+ ">=" => {
+ int: [:int, :int, :bool],
+ float: [:float, :float, :bool],
+ },
+ "==" => {
+ int: [:int, :int, :bool],
+ float: [:float, :float, :bool],
+ string: [:string, :string, :bool],
+ },
+ "!=" => {
+ int: [:int, :int, :bool],
+ float: [:float, :float, :bool],
+ string: [:string, :string, :bool],
+ },
+ "&&" => { bool: [:bool, :bool, :bool] },
+ "||" => { bool: [:bool, :bool, :bool] },
+ "<<" => { int: [:int, :int, :int] },
+ ">>" => { int: [:int, :int, :int] },
+ "&" => { int: [:int, :int, :int] },
+ "^" => { int: [:int, :int, :int] },
+ "|" => { int: [:int, :int, :int] },
+ }
+
+ def initialize(parser)
+ @parser = parser
+ end
+
+ def error(loc, msg)
+ @parser.error(msg, loc)
+ end
+
+ def check(env, expr, type)
+ if (expr.is_a? ANF::If)
+ check_ifexpr(env, expr, type)
+# elsif (expr.is_a? ANF::Func)
+# check_func(env, expr, type)
+ elsif (expr.is_a? ANF::Var)
+ check_var(env, expr, type)
+ elsif (expr.is_a? ANF::Let)
+ check_let(env, expr, type)
+ else
+ etype = infer(env, expr)
+ if type != etype
+ error(expr.loc, "expected #{type}, received #{etype}")
+ end
+ end
+ expr.type = type
+ end
+
+ def infer(env, expr)
+ if expr.is_a? ANF::Const
+ infer_const(env, expr)
+ elsif expr.is_a? ANF::Var
+ infer_var(env, expr)
+ elsif expr.is_a? ANF::Let
+ infer_let(env, expr)
+ elsif expr.is_a? ANF::If
+ infer_ifexpr(env, expr)
+ elsif expr.is_a? ANF::Set
+ infer_set(env, expr)
+ elsif expr.is_a? ANF::Func
+ infer_func(env, expr)
+ elsif expr.is_a? ANF::Apply
+ infer_apply(env, expr)
+ else
+ error(expr.loc, "unable to determine type of expression")
+ end
+ end
+
+ private
+
+ def make_typevar()
+ @typevar ||= 0
+ var = "abcdefghijklmnopqrstuvwxyz"[@typevar]
+ @typevar += 1
+ var
+ end
+
+ def var?(expr)
+ expr.class == ANF::Var
+ end
+
+ def untyped_global_func?(env, func, type)
+ type.nil? and
+ var?(func) and
+ env.global?(func.name) and
+ env[func.name][:value]
+ end
+
+ def check_apply(env, expr, type)
+ # Handle global functions that haven't been typed yet but are
+ # being called. We pause to infer them. This probably causes
+ # a loop on recursive functions. Break that later.
+ if untyped_global_func?(env, expr.func, type)
+ value = env[expr.func.name][:value]
+ env[expr.func.name][:value] = nil
+ infer(@parser.syms, value)
+ type = infer(env, expr.func)
+
+ pp "CHECK_APPLY", expr.func.name, type, expr.func, value
+ end
+
+ error(expr.loc, "object being applied is not a function (has type: #{type.to_s})") if not type.is_a? Array
+ error(expr.loc, "wrong number of arguments to function call") if (type.length - 1) != expr.args.length
+ type[0..-2].each_with_index do |t,i|
+ check(env, expr.args[i], t)
+ end
+ expr.type = type.last
+ end
+
+ def check_ifexpr(env, expr, type)
+ check(env, expr.cond, :bool)
+ check(env, expr.then, type)
+ check(env, expr.else, type)
+ end
+
+ def check_var(env, expr, type)
+ etype = infer(env, expr)
+ if (etype.class == String)
+ expr.type = type
+ env.set_type(expr.name, type)
+ elsif expr.type != type
+ error(expr.loc, "expected #{type}, received #{etype}")
+ end
+ type
+ end
+
+# def check_func(env, expr, type)
+# end
+
+# def check_let(env, expr, type)
+# end
+
+ def infer_const(env, expr)
+ expr.type
+ end
+
+ def infer_var(env, expr)
+ if not env.defined?(expr.name)
+ error(expr.loc, "symbol '#{expr.name}' not defined")
+ end
+ expr.type = env[expr.name][:type]
+ end
+
+ def infer_let(env, let)
+ if let.body.nil?
+ infer_decl(env, let)
+ else
+ infer_let_expr(env, let)
+ end
+ end
+
+ def infer_decl(env, let)
+ env = env.clone
+
+ # handle the binding
+ if let.var.type
+ check(env, let.expr, let.var.type)
+ else
+ let.var.type = infer(env, let.expr)
+ end
+
+ env.set_type(let.var.name, let.var.type)
+ env[let.var.name][:value] = nil
+ let.type = :void
+ end
+
+ def infer_let_expr(env, let)
+ env = env.clone
+
+ # handle the binding
+ if let.var.type
+ check(env, let.expr, let.var.type)
+ else
+ let.var.type = infer(env, let.expr)
+ end
+
+ env.add_sym(let.var.name, let.var.loc, :var, let.var.type)
+ let.type = infer(env, let.body)
+ end
+
+ def infer_ifexpr(env, expr)
+ check(env, expr, infer(env, expr.then))
+ end
+
+ def infer_set(env, expr)
+ error(expr.loc, "infer_set unimplemented")
+ end
+
+ def infer_func(env, expr)
+ env = env.clone
+ @typevar = 0
+ expr.args.each do |a|
+ a.type ||= make_typevar()
+ env.add_sym(a.name, a.loc, :arg, a.type)
+ end
+ infer(env, expr.expr)
+ type = (expr.args + [expr.expr]).map {|v| v.type }
+ type.unshift(:void) if type.length == 1
+
+ # the body may have inferred an arg type, fix it up here
+ expr.args.each_with_index do |a,i|
+ a.type = env[a.name][:type]
+ type[i] = a.type
+ end
+ expr.type = type
+ end
+
+ def infer_apply(env, expr)
+ if expr.func.is_a? String
+ expr.type = infer_opcall(env, expr)
+ else
+ type = infer(env, expr.func)
+ check_apply(env, expr, type)
+ end
+ end
+
+ def assign_type(env, var, type)
+ if var.class == ANF::Var and (var.type.nil? or var.type == String) then
+ var.type = type
+ env[var.name][:type] = type
+ end
+ end
+
+ def infer_opcall(env, expr)
+ # infer the operand type first
+ vtype = infer(env, expr.args[0])
+ if (not vtype or vtype.class == String) and expr.args.length == 2
+ vtype = infer(env, expr.args[1])
+ end
+
+ # use the operand type to pick op type and check it
+ if expr.args.length == 1
+ optype = UnaryOps[expr.func][vtype]
+ error(expr.loc, "unknown unary operation '#{expr.func}' for operand type #{vtype}") if not optype
+ check_apply(env, expr, optype)
+ elsif expr.args.length == 2
+ infer_binary(env, expr, vtype)
+ else
+ error(expr.loc, "too many operands for operator '#{expr.func}'")
+ end
+ end
+
+ def infer_unary(env, expr, vtype)
+ end
+
+ def infer_binary(env, expr, vtype)
+ optype = BinaryOps[expr.func][vtype]
+ error(expr.loc, "unknown binary operation '#{expr.func}' for operand type #{vtype}") if optype.nil?
+
+ expr.args.each_with_index do |a, i|
+ assign_type(env, a, optype[i])
+ end
+ check_apply(env, expr, optype)
+ end
+end
+
+
+class Parser < BaseParser
+ attr_accessor :exprs
+
+ def toplevel
+ @type_checker = TypeChecker.new(self)
+ @exprs = []
+ while !matches(:eof)
+ decl = declaration()
+ if decl.class == ANF::Let
+ syms[decl.var.name][:value] = decl
+ end
+ @exprs << decl
+ end
+ @exprs = @exprs.compact
+ @exprs.each do |e|
+ @type_checker.infer(syms, e)
+ end
+ pp syms
+ end
+
+ def declaration()
+ if matches(:ident)
+ expr = ident()
+ expr.type = syms[expr.name][:type] if syms[expr.name]
+ if accept("=")
+ value = expression()
+ syms.add_sym(expr.name, expr.loc, :var, expr.type)
+ ANF::Let.new(expr.loc, nil, expr, value, nil)
+ elsif accept(":")
+ expr.type = type_spec()
+ syms.add_sym(expr.name, expr.loc, :var, expr.type)
+ nil
+ else
+ expression()
+ end
+ else
+ expression()
+ end
+ end
+
+ def expression()
+ if matches(:let)
+ let_expr()
+ else
+ complex_expr()
+ end
+ end
+
+ def complex_expr()
+ expr = nil
+ if matches(:if)
+ expr = if_expr()
+ elsif matches(:set)
+ expr = var_set()
+ else
+ expr = atomic_expr()
+ if matches("(")
+ expr = func_call(expr)
+ elsif operator?
+ expr = operator_call(expr)
+ end
+ end
+ expr
+ end
+
+ def atomic_expr()
+ if matches(:func)
+ func_expr()
+ elsif matches(:ident)
+ ident()
+ else
+ constant()
+ end
+ end
+
+
+
+
+ def application()
+# expr = atomic_expr()
+# expr = func_call(expr) if matches("(")
+ # EQ, NEQ, '<', LTEQ, '>', GTEQ
+ end
+
+ def simple_expr()
+ # '+', '-', OR
+ end
+
+ def term()
+ # '*', '/', '%', AND
+ end
+
+ def factor()
+ # '(', NOT, atomic
+ end
+
+
+
+
+
+ OPERATORS = {
+ "+" => true,
+ "-" => true,
+ "*" => true,
+ "/" => true,
+ "%" => true,
+ }
+
+ def operator?
+ OPERATORS[peek().type]
+ end
+
+ def operator_call(expr)
+ op = consume()
+ rhs = atomic_expr()
+ ANF::Apply.new(expr.loc, nil, op.type, [expr, rhs])
+ end
+
+
+
+ def if_expr()
+ loc = expect(:if).pos
+ cond = atomic_expr()
+ expect(:then)
+ branch1 = expression()
+ expect(:else)
+ branch2 = expression()
+ ANF::If.new(loc, nil, cond, branch1, branch2)
+ end
+
+ def var_set()
+ loc = expect(:set).pos
+ name = ident()
+ expect("=")
+ expr = expression()
+ ANF::Set.new(loc, nil, name, expr)
+ end
+
+ def func_call(func)
+ args = []
+ expect("(")
+ while !matches(")")
+ args << atomic_expr()
+ expect(",") if not matches(")")
+ end
+ expect(")")
+ ANF::Apply.new(func.loc, nil, func, args)
+ end
+
+ def func_expr()
+ loc = expect(:func).pos
+ args = []
+ expect("(")
+ while !matches(")")
+ args << ident()
+ expect(",") if not matches(")")
+ end
+ expect(")")
+ body = expression()
+ ANF::Func.new(loc, nil, args, body)
+ end
+
+ def ident()
+ name = expect(:ident)
+ ANF::Var.new(name.pos, nil, name.text.to_sym)
+ end
+
+ def constant()
+ tok = consume()
+ if tok.type == :bool
+ ANF::Const.new(tok.pos, :bool, tok.text == "true")
+ elsif tok.type == :string
+ ANF::Const.new(tok.pos, :string, tok.text)
+ elsif tok.type == :int
+ ANF::Const.new(tok.pos, :int, tok.text.to_i)
+ elsif tok.type == :float
+ ANF::Const.new(tok.pos, :float, tok.text.to_f)
+ elsif tok.type == :void
+ ANF::Const.new(tok.pos, :void, :void)
+ else
+ error("invalid constant")
+ end
+ end
+
+ def let_expr()
+ expect(:let)
+ name = ident()
+ type = type_spec() if accept(":")
+ expect("=")
+ expr = complex_expr()
+ expect(:in)
+ body = expression()
+ ANF::Let.new(name.loc, nil, name, expr, body)
+ end
+
+ def type_spec()
+ type = [ident().name]
+ while accept("->")
+ type << ident().name
+ end
+ (type.length == 1 ? type[0] : type)
+ end
+end
+
+parser = Parser.new("inputs/cerise.m")
+pp parser.syms
# byte
# intset
-require 'strscan'
+require 'stringio'
+require_relative "utils/sym_table"
+require_relative "utils/base_lexer"
+require_relative "utils/base_parser"
$debug = false
def error(loc, msg)
if loc[0] == "<input>"
- raise "<input>:0: error: #{msg}"
+ $stderr.puts "<input>:0:#{loc[1]}: error: #{msg}"
else
lines = File.read(loc[0])[0..(loc[1])].split("\n")
$stderr.puts "#{loc[0]}:#{lines.length}: error: #{msg}"
- raise "" if $debug
# $stderr.puts "#{lines.last}"
# $stderr.puts (" " * lines.last.length) + "^"
end
-# exit 1
+ exit 1
end
-class SymTable < Hash
- attr_accessor :type
- attr_accessor :freevars
-
- def initialize(parent = nil)
- @parent = (parent || {})
- @freevars = {}
- end
-
- def clone(type = :block)
- s = SymTable.new(self)
- s.type = type
- s
- end
-
- def [](key)
- (super(key) || @parent[key])
- end
-
- def local(key)
- method(:[]).super_method.call(key)
- end
-
- def defined_ever?(key)
- (not (self[key] || {})[:type].nil?)
- end
-
- def defined_locally?(key)
- (not (local(key) || {})[:type].nil?)
- end
-
- def block_local?(key)
- (not local(key).nil?)
- end
-
- def global?(key)
- if @parent.class == Hash
- block_local?(key)
- elsif block_local? key
- false
- else
- @parent.global? key
- end
- end
-
- def local?(key)
- if @parent.class == Hash
- false
- elsif @type == :func
- block_local? key
- else
- @parent.local? key
- end
- end
-
- def free?(key)
- (defined_ever? key) and (not local? key) and (not global? key)
- end
-
- def merge!(env)
- env.each {|k,v| self[k] = v }
- end
-
- def annotate(key, type)
- self[key] ||= {}
- self[key][:ann] = type
- end
-
- def annotation(key)
- (method(:[]).super_method.call(key) || {})[:ann]
- end
-
- def stack()
- parent = (@parent.is_a?(SymTable) ? @parent.stack : [])
- ([self] + parent).flatten
- end
-end
-
-class Lexer
- Tok = Struct.new(:text, :file, :pos, :type)
+class Lexer < BaseLexer
SPACE = /([ \t\v\n\r]+|#.*\n)/
IDENT = /[_a-zA-Z][_a-zA-Z0-9]*/
BRACES = /[\(\)\[\]\{\}\.]/
"func" => :fun,
}
- attr_accessor :file
- attr_accessor :data
-
- def initialize(path = nil)
- @file = (path || "<input>")
- @text = (path ? File.read(path) : "")
- @data = StringScanner.new(@text)
- end
-
- def parse_string(str)
- @file = "<input>"
- @text = str
- @data = StringScanner.new(@text)
- end
-
- def get_id_type(str)
- ID_TYPES[str] || :ident
- end
-
def next
while @data.skip(SPACE) do
end
if not @data.eos?
type = :eof
if @data.scan(IDENT)
- type = get_id_type(@data.matched)
+ type = (ID_TYPES[@data.matched] || :ident)
elsif @data.scan(FLOATING)
type = :float
elsif @data.scan(INTEGER)
Tok.new("EOF", @file, @data.pos, :eof)
end
end
-
- def linenum(pos = nil)
- @text[0..(pos || @data.pos)].count("\n") + 1
- end
end
-class Parser
+class Parser < BaseParser
LEVELS = {
none: 0,
assign: 1,
Ann = Struct.new(:loc, :type, :expr)
Block = Struct.new(:loc, :type, :exprs)
- def initialize(path = nil)
- parse_file(path)
- end
-
- def parse_string(str)
- @lex = Lexer.new()
- @lex.parse_string(str)
- @prev = nil
- @next = nil
- toplevel
- end
-
- def parse_file(path)
- @lex = Lexer.new(path)
- @prev = nil
- @next = nil
- end
#######################################
# Parsing Rules
expr.br2 = (matches("{") ? block() : expression())
expr
end
-
- #######################################
- # Parsing Primitives
- #######################################
- def error(str, loc = nil)
- file, pos = (loc ? loc : [@lex.file, (@next || @prev).pos])
- raise "#{file}:#{@lex.linenum(pos)}: #{str}"
-# puts "#{file}:#{@lex.linenum(pos)}: #{str}"
-# raise "" if $debug
-# exit 1
- end
-
- def peek()
- @next = @lex.next if @next.nil?
- @next
- end
-
- def matches(type)
- (peek().type == type)
- end
-
- def accept(type)
- if (matches(type))
- @prev = @next
- @next = nil
- true
- else
- false
- end
- end
-
- def expect(type)
- tok = peek()
- if not accept(type)
- error("expected '#{type}', received '#{tok.type}'")
- end
- tok
- end
-
- def consume()
- expect(peek().type)
- end
-
- def eof?
- (peek().type == :eof)
- end
-
- def location()
- if @prev
- [@prev.file, @prev.pos]
- else
- [@lex.file, 0]
- end
- end
end
module TypeChecker
name
end
-
def emit_toplevel(block)
out = StringIO.new
out.puts "void toplevel(void)"
c = (if (123 < 1) 1 else 2)
if (123 < 1) {
- 123
+ 123.0
} else {
321
}
#syms[:a] = {}
#pp syms.global? :a
+tree = Parser.new("inputs/sclpl.src")
tree = Parser.new.parse_string(STRING)
TypeChecker.infer_block(SymTable.new, tree, false)
+pp tree
puts <<-eos
#include <stdarg.h>