Files
one/parser.v
2026-02-05 13:18:40 +01:00

684 lines
14 KiB
V

module main
import term
// ------------------------------------------- Precedence
enum Precedence {
lowest
assignment // = , +=, -=
comparison // ==, !=, <, >
sum // +, -
product // *, /
prefix // -x, !x
call // function()
}
fn (p Parser) get_precedence(tok_type TokenType) Precedence {
return match tok_type {
.equals, .plus_eq, .minus_eq, .star_eq, .slash_eq { .assignment }
.eq_eq, .not_eq, .less_eq, .greater_eq { .comparison }
.plus, .minus { .sum }
.star, .slash { .product }
else { .lowest }
}
}
// ------------------------------------------- Symbol Table
type SymbolInfo = VarSymbolInfo | FuncSymbolInfo | StructTypeSymbolInfo
struct VarSymbolInfo {
type string
}
struct FuncSymbolInfo {
type string
block Block
}
struct StructTypeSymbolInfo {
name string
}
struct SymbolTable {
mut:
variable_scopes []map[string]VarSymbolInfo
functions map[string]FuncSymbolInfo
structs map[string]StructTypeSymbolInfo
}
fn (mut s SymbolTable) define_var(name string, typ string) {
$if debug {
dump(s.variable_scopes.len)
}
if s.variable_scopes.len == 0 {
parse_error('No scope available')
}
if name in s.variable_scopes[s.variable_scopes.len-1] {
parse_error('Variable ${name} already defined in this scope')
}
s.variable_scopes[s.variable_scopes.len-1][name] = VarSymbolInfo{
type: typ
}
}
fn (mut s SymbolTable) lookup_var(name string) ?VarSymbolInfo {
$if debug {
dump(s.variable_scopes.len)
}
if s.variable_scopes.len == 0 {return none}
for variables in s.variable_scopes.reverse() {
if name in variables {
return variables[name]
}
}
return none
}
fn (mut s SymbolTable) define_func(name string, typ string, block Block) {
s.functions[name] = FuncSymbolInfo{type: typ, block: block}
}
fn (mut s SymbolTable) lookup_func(name string) ?FuncSymbolInfo {
if name in s.functions {
return s.functions[name]
}
return none
}
fn (mut s SymbolTable) define_struct_type(name string) {
s.structs[name] = StructTypeSymbolInfo{name: name}
}
fn (mut s SymbolTable) lookup_struct_type(name string) ?StructTypeSymbolInfo {
if name in s.structs {
return s.structs[name]
}
return none
}
fn (mut s SymbolTable) is_in_global_scope() bool {
return s.variable_scopes.len == 1
}
// ------------------------------------------- Expressions
type Expr = VoidExpr | UnaryExpr | BinaryExpr | IntegerLiteral | RealLiteral | BoolLiteral | Variable | TypeExpr | Function | TypeCast | ParenExpr | PrintExpr | FnCall | StructMember | StructInstantiation
struct VoidExpr {}
struct UnaryExpr {
ident string
op string
}
struct BinaryExpr {
left Expr
op string
right Expr
}
struct IntegerLiteral {
val i32
}
struct RealLiteral {
val f32
}
struct BoolLiteral {
val bool
}
struct Variable {
name string
}
struct TypeExpr {
name string
}
struct StructMember {
name string
type string
}
struct StructInstantiation {
name string
member_values []Expr
}
struct Function {
name string
}
struct TypeCast {
type string
expr Expr
}
struct ParenExpr {
expr Expr
}
struct PrintExpr {
expr Expr
type string
}
struct FnCall {
name string
args []Expr
}
// ------------------------------------------- Statements
type Stmt = VarDecl | ExprStmt | ReturnStmt | Block | FuncDecl | Param | StructDecl
struct VarDecl {
name string
type string
value Expr
const bool
}
struct FuncDecl {
name string
params []Param
ret_type string
block Block
}
struct StructDecl {
name string
members []StructMember
}
struct ExprStmt {
expr Expr
}
struct ReturnStmt {
expr Expr
}
struct Block {
stmts []Stmt
}
struct Param {
name string
type string
}
// ------------------------------------------- Parser
struct Parser {
tokens []Token
mut:
symbols SymbolTable
pos int
line int
statements []Stmt
inside_struct bool
}
fn (mut p Parser) peek() Token {
return p.tokens[p.pos]
}
fn (mut p Parser) next() Token {
token := p.tokens[p.pos]
p.pos++
return token
}
fn (mut p Parser) expect(type TokenType) {
if p.peek().type != type {
parse_error('Expected ${str_from_toktype(type)}, got ${str_from_toktype(p.peek().type)}')
}
p.next()
}
// ------------------------------------------- Debug
fn (mut p Parser) dump_token() {
$if debug {
dump(p.peek())
}
}
fn (mut p Parser) dump_stmt() {
$if debug {
if p.statements.len > 0 {
dump(p.statements[p.statements.len-1])
}
}
}
@[noreturn]
fn parse_error(str string) {
eprintln(term.red("Parse Error: ${str}"))
panic("")
}
// ------------------------------------------- Expressions
fn (mut p Parser) parse_primary() Expr {
token := p.next()
p.dump_token()
return match token.type {
.integer {IntegerLiteral{token.text.int()}}
.real {RealLiteral{token.text.f32()}}
.boolean {BoolLiteral{token.text == 'true'}}
.kw_fn {Function{token.text}}
.identifier {p.parse_ident(token.text)}
.type {p.parse_type(token.text)}
.lparen {p.parse_paren()}
.kw_print {p.parse_print()}
else {parse_error("Unexpected Token")}
}
}
fn (mut p Parser) parse_expr(prec Precedence) Expr {
mut expr := p.parse_primary()
for int(prec) < int(p.get_precedence(p.peek().type)) {
op_tok := p.next()
expr = p.parse_binary(expr, op_tok.text, p.get_precedence(op_tok.type))
}
return expr
}
fn (mut p Parser) parse_ident(ident string) Expr {
if p.symbols.lookup_struct_type(ident) != none {
return match p.peek().type {
.lbracket {p.parse_struct_inst(ident)}
else {p.parse_type('struct ${ident}')}
}
}
if p.inside_struct {
return p.parse_struct_member(ident)
}
return match p.peek().type {
.increment, .decrement {UnaryExpr {ident: ident, op: p.next().text}}
.lparen {p.parse_call(ident)}
else {Variable{ident}}
}
}
fn (mut p Parser) parse_struct_member(name string) StructMember {
p.expect(.identifier)
if p.peek().type != .type {
dump(p.peek())
parse_error("Expected type after struct member ${name} in declaration, got ${p.peek().type}")
}
type := p.peek().text
p.expect(.type)
p.expect(.semicolon)
return StructMember{name: name, type: type}
}
fn (mut p Parser) parse_struct_inst(name string) StructInstantiation {
p.expect(.lbracket)
mut member_values := []Expr{}
if p.peek().type != .rbracket {
for {
member_values << p.parse_expr(.lowest)
if p.peek().type == .comma {
p.next()
} else {
break
}
}
}
p.expect(.rbracket)
return StructInstantiation{name: name, member_values: member_values}
}
fn (mut p Parser) parse_call(name string) FnCall {
p.expect(.lparen)
mut args := []Expr{}
if p.peek().type != .rparen {
for {
args << p.parse_expr(.lowest)
if p.peek().type == .comma {
p.next()
} else {
break
}
}
}
p.expect(.rparen)
return FnCall{name: name, args: args}
}
fn (mut p Parser) parse_print() PrintExpr {
p.expect(.lparen)
expr := p.parse_expr(.lowest)
p.expect(.rparen)
return PrintExpr{expr: expr, type: p.get_expr_type(expr)}
}
fn (mut p Parser) parse_binary(left Expr, op string, prec Precedence) BinaryExpr {
//p.next()
right := p.parse_expr(prec)
binary_expr := BinaryExpr{left, op, right}
if !p.is_op_valid_for_type(p.get_expr_type(left), op) {
parse_error("Illegal operation ${op} for type ${p.get_expr_type(left)}")
}
p.check_binary_expr_types(binary_expr)
return binary_expr
}
fn (mut p Parser) parse_type(type string) Expr {
if p.peek().type == .lparen {
p.next()
expr := p.parse_expr(.lowest)
p.expect(.rparen)
return TypeCast {
expr: expr
type: type
}
}
return TypeExpr {name: type}
}
fn (mut p Parser) parse_paren() ParenExpr {
expr := p.parse_expr(.lowest)
p.expect(.rparen)
return ParenExpr{expr: expr}
}
fn (mut p Parser) check_binary_expr_types(expr BinaryExpr) {
left_t := p.get_expr_type(expr.left)
right_t := p.get_expr_type(expr.right)
if left_t != right_t {
parse_error('Type mismatch in expression: ${left_t} and ${right_t}')
}
}
fn (mut p Parser) get_expr_type(expr Expr) string {
return match expr {
ParenExpr {p.get_expr_type(expr.expr)}
IntegerLiteral {'int'}
RealLiteral {'real'}
BoolLiteral {'bool'}
VoidExpr {'void'}
BinaryExpr {
p.check_binary_expr_types(expr)
left_t := p.get_expr_type(expr.left)
if expr.op in ['<=', '==', '>=', '!='] {
'bool'
} else {
left_t
}
}
Variable {
p.dump_stmt()
info := p.symbols.lookup_var(expr.name) or {
parse_error("Undefined variable ${expr.name}")
}
return info.type
}
TypeCast {expr.type}
FnCall {
fninfo := p.symbols.lookup_func(expr.name) or {parse_error("Tried to call undefined function ${expr.name}")}
fninfo.type
}
StructInstantiation {expr.name}
else {"Tried getting type of unexpected Expr"}
}
}
fn (mut p Parser) is_op_valid_for_type(type string, op string) bool {
global := ['=', '==', '!=']
mut legal_ops := match type {
'int', 'real' {['+', '-', '*', '/', '<', '>', '<=', '>=', '++', '--', '+=', '-=', '*=', '/=']}
'bool' {['=']}
else {[]}
}
legal_ops << global
return op in legal_ops
}
// ------------------------------------------- Statements
fn (mut p Parser) get_return_stmts_recursive(block Block) []ReturnStmt {
mut returns := []ReturnStmt{}
for stmt in block.stmts {
if stmt is ReturnStmt {
returns << stmt
}
if stmt is Block {
returns << p.get_return_stmts_recursive(stmt)
}
}
return returns
}
fn (mut p Parser) parse_statement() Stmt {
match p.peek().type {
.kw_let {return p.parse_var_decl(false)}
.kw_const {return p.parse_var_decl(true)}
.kw_return {return p.parse_return_stmt()}
.kw_fn {return p.parse_func_decl()}
.lbracket {return p.parse_block(false)}
.kw_struct {return p.parse_struct()}
else {return p.parse_expr_stmt()}
}
}
fn (mut p Parser) parse_var_decl(is_const bool) VarDecl {
p.next()
name_tok := p.next()
if name_tok.type != .identifier {
parse_error("Expected variable name after let")
}
type_tok := p.next()
type_name := match type_tok.type {
.type {type_tok.text}
.identifier {
if p.symbols.lookup_struct_type(type_tok.text) == none {
parse_error("Expected variable type after name when declaring ${name_tok.text}")
}
'struct ${type_tok.text}'
}
else{parse_error("Expected variable type after name when declaring ${name_tok.text}")}
}
p.expect(.equals)
val := p.parse_expr(.lowest)
if type_tok.text == 'void' {
parse_error("Cannot declare a variable of type void")
}
if p.get_expr_type(val) != type_tok.text {
parse_error("Mismatch between declared type (${type_tok.text}) and actual type (${p.get_expr_type(val)})")
}
p.expect(.semicolon)
p.symbols.define_var(name_tok.text, type_tok.text)
return VarDecl {
name: name_tok.text
value: val
type: type_name
const: is_const
}
}
fn (mut p Parser) parse_func_decl() FuncDecl {
if !p.symbols.is_in_global_scope() {parse_error("Tried to define a function in a non-global scope")}
p.expect(.kw_fn)
name_tok := p.next()
if name_tok.type != .identifier {
parse_error("Expected function name after let")
}
p.expect(.lparen)
mut params := []Param{}
p.symbols.variable_scopes << map[string]VarSymbolInfo{}
if p.peek().type != .rparen {
for {
if p.peek().type != .identifier {parse_error("Invalid syntax to declare function arguments! use f(myint int, myreal real)")}
p_name := p.next().text
if p.peek().type != .type {parse_error("Invalid syntax to declare function arguments! use f(myint int, myreal real)")}
p_type := p.next().text
params << Param{p_name, p_type}
p.symbols.define_var(p_name, p_type)
if p.peek().type == .comma {
p.next()
} else {
break
}
}
}
p.expect(.rparen)
p.dump_token()
type_tok := p.next()
if type_tok.type != .type {
parse_error("Expected function return type after name when declaring ${name_tok.text}")
}
block := p.parse_block(true)
return_stmts := p.get_return_stmts_recursive(block)
for return_stmt in return_stmts {
if p.get_expr_type(return_stmt.expr) != type_tok.text {
parse_error("Mismatch between declared return type (${type_tok.text}) \
and actual return type (${p.get_expr_type(return_stmt.expr)})")
}
}
p.symbols.variable_scopes.delete_last()
p.symbols.define_func(name_tok.text, type_tok.text, block)
return FuncDecl {
name: name_tok.text
ret_type: type_tok.text
block: block
params: params
}
}
fn (mut p Parser) parse_struct() StructDecl {
p.expect(.kw_struct)
name := p.peek().text
p.expect(.identifier)
p.expect(.lbracket)
mut members := []StructMember{}
for p.peek().type == .identifier {
members << p.parse_struct_member(p.peek().text)
}
p.expect(.rbracket)
p.symbols.define_struct_type(name)
return StructDecl{name: name, members: members}
}
fn (mut p Parser) parse_return_stmt() ReturnStmt {
p.expect(.kw_return)
expr := p.parse_expr(.lowest)
p.next()
return ReturnStmt {
expr: expr
}
}
fn (mut p Parser) parse_expr_stmt() ExprStmt {
expr := p.parse_expr(.lowest)
p.expect(.semicolon)
return ExprStmt {
expr: expr
}
}
fn (mut p Parser) parse_block(no_scope bool) Block {
p.expect(.lbracket)
mut statements := []Stmt{}
if !no_scope {
p.symbols.variable_scopes << map[string]VarSymbolInfo{}
}
$if debug {
println("entering scope")
}
for p.peek().type != .rbracket && p.peek().type != .eof {
statements << p.parse_statement()
}
p.expect(.rbracket)
return_stmts := (statements.filter(it is ReturnStmt).map(it as ReturnStmt))
if return_stmts.len > 0 && (return_stmts.len > 1 || Stmt(return_stmts[0]) != statements[statements.len - 1]) {
parse_error("Unexpected use of return. Unreachable code")
}
if !no_scope {
p.symbols.variable_scopes.delete_last()
}
$if debug {
println("exiting scope")
}
return Block {
stmts: statements
}
}
fn (mut p Parser) parse_program() []Stmt {
p.symbols.variable_scopes << map[string]VarSymbolInfo{}
for p.peek().type != .eof {
p.statements << p.parse_statement()
}
p.symbols.variable_scopes.delete_last()
$if debug {
dump(p.symbols.functions)
}
return p.statements
}