tanta roba, wip scope

This commit is contained in:
uan
2026-02-03 23:07:33 +01:00
parent fbae7bc5d5
commit 969de6a59b
5 changed files with 474 additions and 19 deletions

2
.gitignore vendored
View File

@@ -1,6 +1,6 @@
# Binaries for programs and plugins
main
vlexer
onev
*.exe
*.exe~
*.so

59
lexer.v
View File

@@ -3,9 +3,17 @@ module main
import term
enum TokenType as u8 {
kw_let
type
kw_if
kw_else
kw_for
kw_break
kw_fn
kw_return
integer
real
keyword
boolean
identifier
plus
minus
@@ -24,7 +32,6 @@ enum TokenType as u8 {
comma
semicolon
colon
newline
eof
unknown
}
@@ -34,12 +41,19 @@ struct Token {
text string
}
fn str_from_toktype(type TokenType) string {
return match type {
.integer {'integer'}
.real {'real'}
.keyword {'keyword'}
.boolean {'boolean'}
.kw_let {'let'}
.type {'type'}
.kw_if {'if'}
.kw_else {'else'}
.kw_for {'for'}
.kw_break {'break'}
.kw_fn {'break'}
.kw_return {'return'}
.identifier {'identifier'}
.eof {'EOF'}
.unknown {'unknown'}
@@ -60,7 +74,6 @@ fn str_from_toktype(type TokenType) string {
.comma {'comma'}
.semicolon {'semicolon'}
.colon {'colon'}
.newline {'newline'}
}
}
@@ -83,13 +96,27 @@ fn toktype_from_delimiter(delimiter string) TokenType {
',' {.comma}
';' {.semicolon}
':' {.colon}
'\n' {.newline}
else {.unknown}
}
}
fn is_delimiter(c u8) bool {
return " +-*/.,;:%<>()[]{}=\n".contains(c.ascii_str())
fn toktype_from_kw(kw string) TokenType {
return match kw {
'let' {.kw_let}
'void', 'real', 'bool', 'int' {.type}
'if' {.kw_if}
'else' {.kw_else}
'for' {.kw_for}
'break' {.kw_break}
'fn' {.kw_fn}
'return' {.kw_return}
'true', 'false' {.boolean}
else {.unknown}
}
}
fn is_delimiter(c u8, is_inside_number bool) bool {
return " +-*/.,;:%<>()[]{}=\n".contains(c.ascii_str()) && (c.ascii_str() != '.' || !is_inside_number)
}
fn is_real(str string) bool {
@@ -99,7 +126,7 @@ fn is_real(str string) bool {
fn is_keyword(str string) bool {
return [
"void", "int", "real", "bool", "if", "else", "for", "break", "fn", "return"
"void", "int", "real", "bool", "if", "else", "for", "break", "fn", "return", "let", "true", "false"
].contains(str)
}
@@ -118,35 +145,37 @@ fn lex(input string) ?[]Token {
mut right := 0
mut line := 1
mut tokens := []Token{}
mut is_inside_number := false
for (right < input.len && left <= right) {
if input[right] == `\n` {
line++
}
if !is_delimiter(input[right]) {
if !is_delimiter(input[right], is_inside_number) {
is_inside_number = input[left].str().is_int()
right++
}
if right >= input.len {
break
}
if is_delimiter(input[right]) && left == right {
if input[right] != ` ` {
if is_delimiter(input[right], is_inside_number) && left == right {
if !input[right].is_space() {
tokens << Token{toktype_from_delimiter(input[right].ascii_str()), input[right].ascii_str()}
}
right++
left = right
}
else if (is_delimiter(input[right]) && left != right) || (right == input.len && left != right) {
else if (is_delimiter(input[right], is_inside_number) && left != right) || (right == input.len && left != right) {
subs := input.substr(left, right)
if is_keyword(subs) {
tokens << Token{TokenType.keyword, subs}
tokens << Token{toktype_from_kw(subs), subs}
} else if subs.is_int() {
tokens << Token{TokenType.integer, subs}
} else if is_real(subs) {
tokens << Token{TokenType.real, subs}
} else if subs.is_identifier() {
tokens << Token{TokenType.identifier, subs}
} else if !subs.is_identifier() && !is_delimiter(input[right-1]) {
} else if !subs.is_identifier() && !is_delimiter(input[right-1], is_inside_number) {
eprintln(term.red("ERROR: found invalid token " + subs + " at line " + line.str()))
return none
}

7
main.v
View File

@@ -6,5 +6,10 @@ fn main() {
content := os.read_file("test.one") or { return }
println("---------\n" + content + "---------")
tokens := lex(content) or { return }
print_toks(tokens)
mut parser := Parser{
tokens: tokens
pos: 0
}
statements := parser.parse_program()
println(statements)
}

415
parser.v Normal file
View File

@@ -0,0 +1,415 @@
module main
import term
// ------------------------------------------- Symbol Table
type SymbolInfo = VarSymbolInfo | FuncSymbolInfo
struct VarSymbolInfo {
type string
}
struct FuncSymbolInfo {
type string
}
struct SymbolTable {
mut:
variable_scopes []map[string]VarSymbolInfo
functions map[string]FuncSymbolInfo
}
fn (mut s SymbolTable) define_var(name string, typ string) {
$if debug {
dump(s.variable_scopes.len)
}
if s.variable_scopes.len == 0 {
parse_error('No scope available')
}
if name in s.variable_scopes[s.variable_scopes.len-1] {
parse_error('Variable ${name} already defined in this scope')
}
s.variable_scopes[s.variable_scopes.len-1][name] = VarSymbolInfo{type: typ}
}
fn (mut s SymbolTable) lookup_var(name string) ?VarSymbolInfo {
$if debug {
dump(s.variable_scopes.len)
}
if s.variable_scopes.len == 0 {return none}
for variables in s.variable_scopes.reverse() {
if name in variables {
return variables[name]
}
}
return none
}
fn (mut s SymbolTable) define_func(name string, typ string) {
s.functions[name] = FuncSymbolInfo{type: typ}
}
fn (mut s SymbolTable) lookup_func(name string) ?FuncSymbolInfo {
if name in s.functions {
return s.functions[name]
}
return none
}
fn (mut s SymbolTable) is_in_global_scope() bool {
println("scope count: ${s.variable_scopes.len}")
return s.variable_scopes.len == 1
}
// ------------------------------------------- Expressions
type Expr = VoidExpr | BinaryExpr | IntegerLiteral | RealLiteral | BoolLiteral | Variable | TypeExpr | Function
struct VoidExpr {}
struct BinaryExpr {
left Expr
op string
right Expr
}
struct IntegerLiteral {
val i32
}
struct RealLiteral {
val f32
}
struct BoolLiteral {
val bool
}
struct Variable {
name string
}
struct TypeExpr {
name string
}
struct Function {
name string
}
// ------------------------------------------- Statements
type Stmt = VarDecl | ExprStmt | ReturnStmt | Block | FuncDecl
struct VarDecl {
name string
value Expr
type string
}
struct FuncDecl {
name string
ret_type string
block Block
}
struct ExprStmt {
expr Expr
}
struct ReturnStmt {
value Expr
}
struct Block {
stmts []Stmt
}
// ------------------------------------------- Parser
struct Parser {
tokens []Token
mut:
symbols SymbolTable
pos int
statements []Stmt
}
fn (mut p Parser) peek() Token {
return p.tokens[p.pos]
}
fn (mut p Parser) next() Token {
token := p.tokens[p.pos]
p.pos++
return token
}
fn (mut p Parser) expect(type TokenType) {
if p.peek().type != type {
parse_error('Expected ${str_from_toktype(type)}, got ${str_from_toktype(p.peek().type)}')
}
p.next()
}
// ------------------------------------------- Debug
fn (mut p Parser) dump_token() {
$if debug {
dump(p.peek())
}
}
fn (mut p Parser) dump_stmt() {
$if debug {
if p.statements.len > 0 {
dump(p.statements[p.statements.len-1])
}
}
}
@[noreturn]
fn parse_error(str string) {
eprintln(term.red("Parse Error: " + str))
panic("")
}
// ------------------------------------------- Expressions
fn (mut p Parser) parse_primary() Expr {
token := p.next()
p.dump_token()
return match token.type {
.integer {IntegerLiteral{token.text.int()}}
.real {RealLiteral{token.text.f32()}}
.boolean {BoolLiteral{token.text == 'true'}}
.identifier {Variable{token.text}}
.type {TypeExpr{token.text}}
.kw_fn {Function{token.text}}
else {parse_error("Unexpected Token")}
}
}
fn (mut p Parser) parse_expr() Expr {
mut left := p.parse_primary()
match p.peek().type {
.plus {return p.parse_binary(left, '+')}
.minus {return p.parse_binary(left, '-')}
.star {return p.parse_binary(left, '*')}
.slash {return p.parse_binary(left, '/')}
.equals {return p.parse_binary(left, '=')}
else {return left}
}
}
fn (mut p Parser) parse_binary(left Expr, op string) BinaryExpr {
p.next()
right := p.parse_expr()
return BinaryExpr{left, op, right}
}
fn (mut p Parser) get_expr_type(expr Expr) string {
return match expr {
IntegerLiteral {'int'}
RealLiteral {'real'}
BoolLiteral {'bool'}
VoidExpr {'void'}
BinaryExpr {
left_t := p.get_expr_type(expr.left)
right_t := p.get_expr_type(expr.right)
if left_t != right_t {
parse_error ('Type mismatch in expression: ${left_t} and ${right_t}')
}
left_t
}
Variable {
p.dump_stmt()
info := p.symbols.lookup_var(expr.name) or {
parse_error("Undefined variable ${expr.name}")
}
return info.type
}
else {"Tried getting type of unexpected Expr"}
}
}
// ------------------------------------------- Statements
fn (mut p Parser) get_return_stmts_recursive(block Block) []ReturnStmt {
mut returns := []ReturnStmt{}
for stmt in block.stmts {
if stmt is ReturnStmt {
returns << stmt
}
if stmt is Block {
returns << p.get_return_stmts_recursive(stmt)
}
}
return returns
}
fn (mut p Parser) parse_statement() Stmt {
match p.peek().type {
.kw_let {return p.parse_var_decl()}
.kw_return {return p.parse_return_stmt()}
.kw_fn {return p.parse_func_decl()}
.lbracket {return p.parse_block()}
else {return p.parse_expr_stmt()}
}
}
fn (mut p Parser) parse_var_decl() VarDecl {
p.expect(.kw_let)
name_tok := p.next()
if name_tok.type != .identifier {
parse_error("Expected variable name after let")
}
type_tok := p.next()
if type_tok.type != .type {
parse_error("Expected variable type after name")
}
p.expect(.equals)
val := p.parse_expr()
if type_tok.text == 'void' {
parse_error("Cannot declare a variable of type void")
}
if p.get_expr_type(val) != type_tok.text {
parse_error("Mismatch between declared type (${type_tok.text}) and actual type (${p.get_expr_type(val)})")
}
p.expect(.semicolon)
p.symbols.define_var(name_tok.text, type_tok.text)
return VarDecl {
name: name_tok.text
value: val
type: type_tok.text
}
}
fn (mut p Parser) parse_func_decl() FuncDecl {
if !p.symbols.is_in_global_scope() {parse_error("Tried to define a function in a non-global scope")}
p.expect(.kw_fn)
name_tok := p.next()
if name_tok.type != .identifier {
parse_error("Expected variable name after let")
}
p.expect(.lparen)
p.expect(.rparen)
type_tok := p.next()
if type_tok.type != .type {
parse_error("Expected variable type after name")
}
block := p.parse_block()
return_stmts := p.get_return_stmts_recursive(block)
for return_stmt in return_stmts {
if p.get_expr_type(return_stmt.value) != type_tok.text {
parse_error("Mismatch between declared return type (${type_tok.text}) \
and actual return type (${p.get_expr_type(return_stmt.value)})")
}
}
p.symbols.define_func(name_tok.text, type_tok.text)
return FuncDecl {
name: name_tok.text
ret_type: type_tok.text
block: block
}
}
fn (mut p Parser) parse_return_stmt() ReturnStmt {
p.expect(.kw_return)
token := p.peek()
p.dump_token()
mut expr := Expr{}
expr = match token.type {
.integer {IntegerLiteral{token.text.int()}}
.real {RealLiteral{token.text.f32()}}
.boolean {BoolLiteral{token.text == 'true'}}
.identifier {Variable{token.text}}
.semicolon {VoidExpr{}}
else {parse_error("Unexpected Token")}
}
p.next()
if !(expr is VoidExpr) {
p.expect(.semicolon)
}
return ReturnStmt {
value: expr
}
}
fn (mut p Parser) parse_expr_stmt() ExprStmt {
expr := p.parse_expr()
p.expect(.semicolon)
return ExprStmt {
expr: expr
}
}
fn (mut p Parser) parse_block() Block {
p.expect(.lbracket)
mut statements := []Stmt{}
p.symbols.variable_scopes << map[string]VarSymbolInfo{}
$if debug {
println("entering scope")
}
for p.peek().type != .rbracket && p.peek().type != .eof {
statements << p.parse_statement()
}
p.expect(.rbracket)
return_stmts := (statements.filter(it is ReturnStmt).map(it as ReturnStmt))
if return_stmts.len > 0 && (return_stmts.len > 1 || Stmt(return_stmts[0]) != statements[statements.len - 1]) {
parse_error("Unexpected use of return. Unreachable code")
}
p.symbols.variable_scopes.delete_last()
$if debug {
println("exiting scope")
}
return Block {
stmts: statements
}
}
fn (mut p Parser) parse_program() []Stmt {
p.symbols.variable_scopes << map[string]VarSymbolInfo{}
for p.peek().type != .eof {
p.statements << p.parse_statement()
}
p.symbols.variable_scopes.delete_last()
return p.statements
}

View File

@@ -1,3 +1,9 @@
fn bool singledigit(int x) {
return x < 10;
fn foo() int {
let y int = 2;
let z int = y*2;
return y;
}
fn bar() float {
return 5.5;
}