From 969de6a59bccd270a91bbf698da7198e342c456f Mon Sep 17 00:00:00 2001 From: uan Date: Tue, 3 Feb 2026 23:07:33 +0100 Subject: [PATCH] tanta roba, wip scope --- .gitignore | 2 +- lexer.v | 59 ++++++-- main.v | 7 +- parser.v | 415 +++++++++++++++++++++++++++++++++++++++++++++++++++++ test.one | 10 +- 5 files changed, 474 insertions(+), 19 deletions(-) create mode 100644 parser.v diff --git a/.gitignore b/.gitignore index 9a4f52a..2a3dc53 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ # Binaries for programs and plugins main -vlexer +onev *.exe *.exe~ *.so diff --git a/lexer.v b/lexer.v index 0f9d203..79790fc 100644 --- a/lexer.v +++ b/lexer.v @@ -3,9 +3,17 @@ module main import term enum TokenType as u8 { + kw_let + type + kw_if + kw_else + kw_for + kw_break + kw_fn + kw_return integer real - keyword + boolean identifier plus minus @@ -24,7 +32,6 @@ enum TokenType as u8 { comma semicolon colon - newline eof unknown } @@ -34,12 +41,19 @@ struct Token { text string } - fn str_from_toktype(type TokenType) string { return match type { .integer {'integer'} .real {'real'} - .keyword {'keyword'} + .boolean {'boolean'} + .kw_let {'let'} + .type {'type'} + .kw_if {'if'} + .kw_else {'else'} + .kw_for {'for'} + .kw_break {'break'} + .kw_fn {'break'} + .kw_return {'return'} .identifier {'identifier'} .eof {'EOF'} .unknown {'unknown'} @@ -60,7 +74,6 @@ fn str_from_toktype(type TokenType) string { .comma {'comma'} .semicolon {'semicolon'} .colon {'colon'} - .newline {'newline'} } } @@ -83,13 +96,27 @@ fn toktype_from_delimiter(delimiter string) TokenType { ',' {.comma} ';' {.semicolon} ':' {.colon} - '\n' {.newline} else {.unknown} } } -fn is_delimiter(c u8) bool { - return " +-*/.,;:%<>()[]{}=\n".contains(c.ascii_str()) +fn toktype_from_kw(kw string) TokenType { + return match kw { + 'let' {.kw_let} + 'void', 'real', 'bool', 'int' {.type} + 'if' {.kw_if} + 'else' {.kw_else} + 'for' {.kw_for} + 'break' {.kw_break} + 'fn' {.kw_fn} + 'return' {.kw_return} + 'true', 'false' {.boolean} + else {.unknown} + } +} + +fn is_delimiter(c u8, is_inside_number bool) bool { + return " +-*/.,;:%<>()[]{}=\n".contains(c.ascii_str()) && (c.ascii_str() != '.' || !is_inside_number) } fn is_real(str string) bool { @@ -99,7 +126,7 @@ fn is_real(str string) bool { fn is_keyword(str string) bool { return [ - "void", "int", "real", "bool", "if", "else", "for", "break", "fn", "return" + "void", "int", "real", "bool", "if", "else", "for", "break", "fn", "return", "let", "true", "false" ].contains(str) } @@ -118,35 +145,37 @@ fn lex(input string) ?[]Token { mut right := 0 mut line := 1 mut tokens := []Token{} + mut is_inside_number := false for (right < input.len && left <= right) { if input[right] == `\n` { line++ } - if !is_delimiter(input[right]) { + if !is_delimiter(input[right], is_inside_number) { + is_inside_number = input[left].str().is_int() right++ } if right >= input.len { break } - if is_delimiter(input[right]) && left == right { - if input[right] != ` ` { + if is_delimiter(input[right], is_inside_number) && left == right { + if !input[right].is_space() { tokens << Token{toktype_from_delimiter(input[right].ascii_str()), input[right].ascii_str()} } right++ left = right } - else if (is_delimiter(input[right]) && left != right) || (right == input.len && left != right) { + else if (is_delimiter(input[right], is_inside_number) && left != right) || (right == input.len && left != right) { subs := input.substr(left, right) if is_keyword(subs) { - tokens << Token{TokenType.keyword, subs} + tokens << Token{toktype_from_kw(subs), subs} } else if subs.is_int() { tokens << Token{TokenType.integer, subs} } else if is_real(subs) { tokens << Token{TokenType.real, subs} } else if subs.is_identifier() { tokens << Token{TokenType.identifier, subs} - } else if !subs.is_identifier() && !is_delimiter(input[right-1]) { + } else if !subs.is_identifier() && !is_delimiter(input[right-1], is_inside_number) { eprintln(term.red("ERROR: found invalid token " + subs + " at line " + line.str())) return none } diff --git a/main.v b/main.v index e8bd6c9..9c413dd 100644 --- a/main.v +++ b/main.v @@ -6,5 +6,10 @@ fn main() { content := os.read_file("test.one") or { return } println("---------\n" + content + "---------") tokens := lex(content) or { return } - print_toks(tokens) + mut parser := Parser{ + tokens: tokens + pos: 0 + } + statements := parser.parse_program() + println(statements) } diff --git a/parser.v b/parser.v new file mode 100644 index 0000000..e4bf438 --- /dev/null +++ b/parser.v @@ -0,0 +1,415 @@ +module main + +import term + +// ------------------------------------------- Symbol Table + +type SymbolInfo = VarSymbolInfo | FuncSymbolInfo + +struct VarSymbolInfo { + type string +} + +struct FuncSymbolInfo { + type string +} + +struct SymbolTable { +mut: + variable_scopes []map[string]VarSymbolInfo + functions map[string]FuncSymbolInfo +} + +fn (mut s SymbolTable) define_var(name string, typ string) { + $if debug { + dump(s.variable_scopes.len) + } + + if s.variable_scopes.len == 0 { + parse_error('No scope available') + } + + if name in s.variable_scopes[s.variable_scopes.len-1] { + parse_error('Variable ${name} already defined in this scope') + } + + s.variable_scopes[s.variable_scopes.len-1][name] = VarSymbolInfo{type: typ} +} + +fn (mut s SymbolTable) lookup_var(name string) ?VarSymbolInfo { + $if debug { + dump(s.variable_scopes.len) + } + if s.variable_scopes.len == 0 {return none} + for variables in s.variable_scopes.reverse() { + if name in variables { + return variables[name] + } + } + return none +} + +fn (mut s SymbolTable) define_func(name string, typ string) { + s.functions[name] = FuncSymbolInfo{type: typ} +} + +fn (mut s SymbolTable) lookup_func(name string) ?FuncSymbolInfo { + if name in s.functions { + return s.functions[name] + } + return none +} + +fn (mut s SymbolTable) is_in_global_scope() bool { + println("scope count: ${s.variable_scopes.len}") + return s.variable_scopes.len == 1 +} + +// ------------------------------------------- Expressions + +type Expr = VoidExpr | BinaryExpr | IntegerLiteral | RealLiteral | BoolLiteral | Variable | TypeExpr | Function + +struct VoidExpr {} + +struct BinaryExpr { + left Expr + op string + right Expr +} + +struct IntegerLiteral { + val i32 +} + +struct RealLiteral { + val f32 +} + +struct BoolLiteral { + val bool +} + +struct Variable { + name string +} + +struct TypeExpr { + name string +} + +struct Function { + name string +} + +// ------------------------------------------- Statements + +type Stmt = VarDecl | ExprStmt | ReturnStmt | Block | FuncDecl + +struct VarDecl { + name string + value Expr + type string +} + +struct FuncDecl { + name string + ret_type string + block Block +} + +struct ExprStmt { + expr Expr +} + +struct ReturnStmt { + value Expr +} + +struct Block { + stmts []Stmt +} + +// ------------------------------------------- Parser + +struct Parser { + tokens []Token +mut: + symbols SymbolTable + pos int + statements []Stmt +} + +fn (mut p Parser) peek() Token { + return p.tokens[p.pos] +} + +fn (mut p Parser) next() Token { + token := p.tokens[p.pos] + p.pos++ + return token +} + +fn (mut p Parser) expect(type TokenType) { + if p.peek().type != type { + parse_error('Expected ${str_from_toktype(type)}, got ${str_from_toktype(p.peek().type)}') + } + p.next() +} + +// ------------------------------------------- Debug + +fn (mut p Parser) dump_token() { + $if debug { + dump(p.peek()) + } +} + +fn (mut p Parser) dump_stmt() { + $if debug { + if p.statements.len > 0 { + dump(p.statements[p.statements.len-1]) + } + } +} + +@[noreturn] +fn parse_error(str string) { + eprintln(term.red("Parse Error: " + str)) + panic("") +} + +// ------------------------------------------- Expressions + +fn (mut p Parser) parse_primary() Expr { + token := p.next() + + p.dump_token() + + return match token.type { + .integer {IntegerLiteral{token.text.int()}} + .real {RealLiteral{token.text.f32()}} + .boolean {BoolLiteral{token.text == 'true'}} + .identifier {Variable{token.text}} + .type {TypeExpr{token.text}} + .kw_fn {Function{token.text}} + else {parse_error("Unexpected Token")} + } +} + +fn (mut p Parser) parse_expr() Expr { + mut left := p.parse_primary() + + match p.peek().type { + .plus {return p.parse_binary(left, '+')} + .minus {return p.parse_binary(left, '-')} + .star {return p.parse_binary(left, '*')} + .slash {return p.parse_binary(left, '/')} + .equals {return p.parse_binary(left, '=')} + else {return left} + } +} + +fn (mut p Parser) parse_binary(left Expr, op string) BinaryExpr { + p.next() + right := p.parse_expr() + return BinaryExpr{left, op, right} +} + +fn (mut p Parser) get_expr_type(expr Expr) string { + return match expr { + IntegerLiteral {'int'} + RealLiteral {'real'} + BoolLiteral {'bool'} + VoidExpr {'void'} + BinaryExpr { + left_t := p.get_expr_type(expr.left) + right_t := p.get_expr_type(expr.right) + if left_t != right_t { + parse_error ('Type mismatch in expression: ${left_t} and ${right_t}') + } + left_t + } + Variable { + p.dump_stmt() + info := p.symbols.lookup_var(expr.name) or { + parse_error("Undefined variable ${expr.name}") + } + return info.type + } + else {"Tried getting type of unexpected Expr"} + } +} + +// ------------------------------------------- Statements + +fn (mut p Parser) get_return_stmts_recursive(block Block) []ReturnStmt { + mut returns := []ReturnStmt{} + for stmt in block.stmts { + if stmt is ReturnStmt { + returns << stmt + } + if stmt is Block { + returns << p.get_return_stmts_recursive(stmt) + } + } + return returns +} + +fn (mut p Parser) parse_statement() Stmt { + match p.peek().type { + .kw_let {return p.parse_var_decl()} + .kw_return {return p.parse_return_stmt()} + .kw_fn {return p.parse_func_decl()} + .lbracket {return p.parse_block()} + else {return p.parse_expr_stmt()} + } +} + +fn (mut p Parser) parse_var_decl() VarDecl { + p.expect(.kw_let) + + name_tok := p.next() + if name_tok.type != .identifier { + parse_error("Expected variable name after let") + } + + type_tok := p.next() + if type_tok.type != .type { + parse_error("Expected variable type after name") + } + + p.expect(.equals) + val := p.parse_expr() + + if type_tok.text == 'void' { + parse_error("Cannot declare a variable of type void") + } + if p.get_expr_type(val) != type_tok.text { + parse_error("Mismatch between declared type (${type_tok.text}) and actual type (${p.get_expr_type(val)})") + } + p.expect(.semicolon) + + p.symbols.define_var(name_tok.text, type_tok.text) + + return VarDecl { + name: name_tok.text + value: val + type: type_tok.text + } +} + +fn (mut p Parser) parse_func_decl() FuncDecl { + + if !p.symbols.is_in_global_scope() {parse_error("Tried to define a function in a non-global scope")} + + p.expect(.kw_fn) + + name_tok := p.next() + if name_tok.type != .identifier { + parse_error("Expected variable name after let") + } + + p.expect(.lparen) + p.expect(.rparen) + + type_tok := p.next() + if type_tok.type != .type { + parse_error("Expected variable type after name") + } + + block := p.parse_block() + + return_stmts := p.get_return_stmts_recursive(block) + + for return_stmt in return_stmts { + if p.get_expr_type(return_stmt.value) != type_tok.text { + parse_error("Mismatch between declared return type (${type_tok.text}) \ + and actual return type (${p.get_expr_type(return_stmt.value)})") + } + } + + p.symbols.define_func(name_tok.text, type_tok.text) + + return FuncDecl { + name: name_tok.text + ret_type: type_tok.text + block: block + } +} + +fn (mut p Parser) parse_return_stmt() ReturnStmt { + p.expect(.kw_return) + + token := p.peek() + p.dump_token() + mut expr := Expr{} + expr = match token.type { + .integer {IntegerLiteral{token.text.int()}} + .real {RealLiteral{token.text.f32()}} + .boolean {BoolLiteral{token.text == 'true'}} + .identifier {Variable{token.text}} + .semicolon {VoidExpr{}} + else {parse_error("Unexpected Token")} + } + p.next() + if !(expr is VoidExpr) { + p.expect(.semicolon) + } + return ReturnStmt { + value: expr + } +} + +fn (mut p Parser) parse_expr_stmt() ExprStmt { + expr := p.parse_expr() + p.expect(.semicolon) + + return ExprStmt { + expr: expr + } +} + +fn (mut p Parser) parse_block() Block { + p.expect(.lbracket) + + mut statements := []Stmt{} + + p.symbols.variable_scopes << map[string]VarSymbolInfo{} + $if debug { + println("entering scope") + } + + for p.peek().type != .rbracket && p.peek().type != .eof { + statements << p.parse_statement() + } + + p.expect(.rbracket) + + + return_stmts := (statements.filter(it is ReturnStmt).map(it as ReturnStmt)) + if return_stmts.len > 0 && (return_stmts.len > 1 || Stmt(return_stmts[0]) != statements[statements.len - 1]) { + parse_error("Unexpected use of return. Unreachable code") + } + + p.symbols.variable_scopes.delete_last() + + $if debug { + println("exiting scope") + } + + return Block { + stmts: statements + } + +} + +fn (mut p Parser) parse_program() []Stmt { + p.symbols.variable_scopes << map[string]VarSymbolInfo{} + for p.peek().type != .eof { + p.statements << p.parse_statement() + } + p.symbols.variable_scopes.delete_last() + return p.statements +} + + diff --git a/test.one b/test.one index 0a36409..7a5d334 100644 --- a/test.one +++ b/test.one @@ -1,3 +1,9 @@ -fn bool singledigit(int x) { - return x < 10; +fn foo() int { + let y int = 2; + let z int = y*2; + return y; +} + +fn bar() float { + return 5.5; }