tanta roba, wip scope
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,6 +1,6 @@
|
||||
# Binaries for programs and plugins
|
||||
main
|
||||
vlexer
|
||||
onev
|
||||
*.exe
|
||||
*.exe~
|
||||
*.so
|
||||
|
||||
59
lexer.v
59
lexer.v
@@ -3,9 +3,17 @@ module main
|
||||
import term
|
||||
|
||||
enum TokenType as u8 {
|
||||
kw_let
|
||||
type
|
||||
kw_if
|
||||
kw_else
|
||||
kw_for
|
||||
kw_break
|
||||
kw_fn
|
||||
kw_return
|
||||
integer
|
||||
real
|
||||
keyword
|
||||
boolean
|
||||
identifier
|
||||
plus
|
||||
minus
|
||||
@@ -24,7 +32,6 @@ enum TokenType as u8 {
|
||||
comma
|
||||
semicolon
|
||||
colon
|
||||
newline
|
||||
eof
|
||||
unknown
|
||||
}
|
||||
@@ -34,12 +41,19 @@ struct Token {
|
||||
text string
|
||||
}
|
||||
|
||||
|
||||
fn str_from_toktype(type TokenType) string {
|
||||
return match type {
|
||||
.integer {'integer'}
|
||||
.real {'real'}
|
||||
.keyword {'keyword'}
|
||||
.boolean {'boolean'}
|
||||
.kw_let {'let'}
|
||||
.type {'type'}
|
||||
.kw_if {'if'}
|
||||
.kw_else {'else'}
|
||||
.kw_for {'for'}
|
||||
.kw_break {'break'}
|
||||
.kw_fn {'break'}
|
||||
.kw_return {'return'}
|
||||
.identifier {'identifier'}
|
||||
.eof {'EOF'}
|
||||
.unknown {'unknown'}
|
||||
@@ -60,7 +74,6 @@ fn str_from_toktype(type TokenType) string {
|
||||
.comma {'comma'}
|
||||
.semicolon {'semicolon'}
|
||||
.colon {'colon'}
|
||||
.newline {'newline'}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,13 +96,27 @@ fn toktype_from_delimiter(delimiter string) TokenType {
|
||||
',' {.comma}
|
||||
';' {.semicolon}
|
||||
':' {.colon}
|
||||
'\n' {.newline}
|
||||
else {.unknown}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_delimiter(c u8) bool {
|
||||
return " +-*/.,;:%<>()[]{}=\n".contains(c.ascii_str())
|
||||
fn toktype_from_kw(kw string) TokenType {
|
||||
return match kw {
|
||||
'let' {.kw_let}
|
||||
'void', 'real', 'bool', 'int' {.type}
|
||||
'if' {.kw_if}
|
||||
'else' {.kw_else}
|
||||
'for' {.kw_for}
|
||||
'break' {.kw_break}
|
||||
'fn' {.kw_fn}
|
||||
'return' {.kw_return}
|
||||
'true', 'false' {.boolean}
|
||||
else {.unknown}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_delimiter(c u8, is_inside_number bool) bool {
|
||||
return " +-*/.,;:%<>()[]{}=\n".contains(c.ascii_str()) && (c.ascii_str() != '.' || !is_inside_number)
|
||||
}
|
||||
|
||||
fn is_real(str string) bool {
|
||||
@@ -99,7 +126,7 @@ fn is_real(str string) bool {
|
||||
|
||||
fn is_keyword(str string) bool {
|
||||
return [
|
||||
"void", "int", "real", "bool", "if", "else", "for", "break", "fn", "return"
|
||||
"void", "int", "real", "bool", "if", "else", "for", "break", "fn", "return", "let", "true", "false"
|
||||
].contains(str)
|
||||
}
|
||||
|
||||
@@ -118,35 +145,37 @@ fn lex(input string) ?[]Token {
|
||||
mut right := 0
|
||||
mut line := 1
|
||||
mut tokens := []Token{}
|
||||
mut is_inside_number := false
|
||||
|
||||
for (right < input.len && left <= right) {
|
||||
if input[right] == `\n` {
|
||||
line++
|
||||
}
|
||||
if !is_delimiter(input[right]) {
|
||||
if !is_delimiter(input[right], is_inside_number) {
|
||||
is_inside_number = input[left].str().is_int()
|
||||
right++
|
||||
}
|
||||
if right >= input.len {
|
||||
break
|
||||
}
|
||||
if is_delimiter(input[right]) && left == right {
|
||||
if input[right] != ` ` {
|
||||
if is_delimiter(input[right], is_inside_number) && left == right {
|
||||
if !input[right].is_space() {
|
||||
tokens << Token{toktype_from_delimiter(input[right].ascii_str()), input[right].ascii_str()}
|
||||
}
|
||||
right++
|
||||
left = right
|
||||
}
|
||||
else if (is_delimiter(input[right]) && left != right) || (right == input.len && left != right) {
|
||||
else if (is_delimiter(input[right], is_inside_number) && left != right) || (right == input.len && left != right) {
|
||||
subs := input.substr(left, right)
|
||||
if is_keyword(subs) {
|
||||
tokens << Token{TokenType.keyword, subs}
|
||||
tokens << Token{toktype_from_kw(subs), subs}
|
||||
} else if subs.is_int() {
|
||||
tokens << Token{TokenType.integer, subs}
|
||||
} else if is_real(subs) {
|
||||
tokens << Token{TokenType.real, subs}
|
||||
} else if subs.is_identifier() {
|
||||
tokens << Token{TokenType.identifier, subs}
|
||||
} else if !subs.is_identifier() && !is_delimiter(input[right-1]) {
|
||||
} else if !subs.is_identifier() && !is_delimiter(input[right-1], is_inside_number) {
|
||||
eprintln(term.red("ERROR: found invalid token " + subs + " at line " + line.str()))
|
||||
return none
|
||||
}
|
||||
|
||||
7
main.v
7
main.v
@@ -6,5 +6,10 @@ fn main() {
|
||||
content := os.read_file("test.one") or { return }
|
||||
println("---------\n" + content + "---------")
|
||||
tokens := lex(content) or { return }
|
||||
print_toks(tokens)
|
||||
mut parser := Parser{
|
||||
tokens: tokens
|
||||
pos: 0
|
||||
}
|
||||
statements := parser.parse_program()
|
||||
println(statements)
|
||||
}
|
||||
|
||||
415
parser.v
Normal file
415
parser.v
Normal file
@@ -0,0 +1,415 @@
|
||||
module main
|
||||
|
||||
import term
|
||||
|
||||
// ------------------------------------------- Symbol Table
|
||||
|
||||
type SymbolInfo = VarSymbolInfo | FuncSymbolInfo
|
||||
|
||||
struct VarSymbolInfo {
|
||||
type string
|
||||
}
|
||||
|
||||
struct FuncSymbolInfo {
|
||||
type string
|
||||
}
|
||||
|
||||
struct SymbolTable {
|
||||
mut:
|
||||
variable_scopes []map[string]VarSymbolInfo
|
||||
functions map[string]FuncSymbolInfo
|
||||
}
|
||||
|
||||
fn (mut s SymbolTable) define_var(name string, typ string) {
|
||||
$if debug {
|
||||
dump(s.variable_scopes.len)
|
||||
}
|
||||
|
||||
if s.variable_scopes.len == 0 {
|
||||
parse_error('No scope available')
|
||||
}
|
||||
|
||||
if name in s.variable_scopes[s.variable_scopes.len-1] {
|
||||
parse_error('Variable ${name} already defined in this scope')
|
||||
}
|
||||
|
||||
s.variable_scopes[s.variable_scopes.len-1][name] = VarSymbolInfo{type: typ}
|
||||
}
|
||||
|
||||
fn (mut s SymbolTable) lookup_var(name string) ?VarSymbolInfo {
|
||||
$if debug {
|
||||
dump(s.variable_scopes.len)
|
||||
}
|
||||
if s.variable_scopes.len == 0 {return none}
|
||||
for variables in s.variable_scopes.reverse() {
|
||||
if name in variables {
|
||||
return variables[name]
|
||||
}
|
||||
}
|
||||
return none
|
||||
}
|
||||
|
||||
fn (mut s SymbolTable) define_func(name string, typ string) {
|
||||
s.functions[name] = FuncSymbolInfo{type: typ}
|
||||
}
|
||||
|
||||
fn (mut s SymbolTable) lookup_func(name string) ?FuncSymbolInfo {
|
||||
if name in s.functions {
|
||||
return s.functions[name]
|
||||
}
|
||||
return none
|
||||
}
|
||||
|
||||
fn (mut s SymbolTable) is_in_global_scope() bool {
|
||||
println("scope count: ${s.variable_scopes.len}")
|
||||
return s.variable_scopes.len == 1
|
||||
}
|
||||
|
||||
// ------------------------------------------- Expressions
|
||||
|
||||
type Expr = VoidExpr | BinaryExpr | IntegerLiteral | RealLiteral | BoolLiteral | Variable | TypeExpr | Function
|
||||
|
||||
struct VoidExpr {}
|
||||
|
||||
struct BinaryExpr {
|
||||
left Expr
|
||||
op string
|
||||
right Expr
|
||||
}
|
||||
|
||||
struct IntegerLiteral {
|
||||
val i32
|
||||
}
|
||||
|
||||
struct RealLiteral {
|
||||
val f32
|
||||
}
|
||||
|
||||
struct BoolLiteral {
|
||||
val bool
|
||||
}
|
||||
|
||||
struct Variable {
|
||||
name string
|
||||
}
|
||||
|
||||
struct TypeExpr {
|
||||
name string
|
||||
}
|
||||
|
||||
struct Function {
|
||||
name string
|
||||
}
|
||||
|
||||
// ------------------------------------------- Statements
|
||||
|
||||
type Stmt = VarDecl | ExprStmt | ReturnStmt | Block | FuncDecl
|
||||
|
||||
struct VarDecl {
|
||||
name string
|
||||
value Expr
|
||||
type string
|
||||
}
|
||||
|
||||
struct FuncDecl {
|
||||
name string
|
||||
ret_type string
|
||||
block Block
|
||||
}
|
||||
|
||||
struct ExprStmt {
|
||||
expr Expr
|
||||
}
|
||||
|
||||
struct ReturnStmt {
|
||||
value Expr
|
||||
}
|
||||
|
||||
struct Block {
|
||||
stmts []Stmt
|
||||
}
|
||||
|
||||
// ------------------------------------------- Parser
|
||||
|
||||
struct Parser {
|
||||
tokens []Token
|
||||
mut:
|
||||
symbols SymbolTable
|
||||
pos int
|
||||
statements []Stmt
|
||||
}
|
||||
|
||||
fn (mut p Parser) peek() Token {
|
||||
return p.tokens[p.pos]
|
||||
}
|
||||
|
||||
fn (mut p Parser) next() Token {
|
||||
token := p.tokens[p.pos]
|
||||
p.pos++
|
||||
return token
|
||||
}
|
||||
|
||||
fn (mut p Parser) expect(type TokenType) {
|
||||
if p.peek().type != type {
|
||||
parse_error('Expected ${str_from_toktype(type)}, got ${str_from_toktype(p.peek().type)}')
|
||||
}
|
||||
p.next()
|
||||
}
|
||||
|
||||
// ------------------------------------------- Debug
|
||||
|
||||
fn (mut p Parser) dump_token() {
|
||||
$if debug {
|
||||
dump(p.peek())
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut p Parser) dump_stmt() {
|
||||
$if debug {
|
||||
if p.statements.len > 0 {
|
||||
dump(p.statements[p.statements.len-1])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@[noreturn]
|
||||
fn parse_error(str string) {
|
||||
eprintln(term.red("Parse Error: " + str))
|
||||
panic("")
|
||||
}
|
||||
|
||||
// ------------------------------------------- Expressions
|
||||
|
||||
fn (mut p Parser) parse_primary() Expr {
|
||||
token := p.next()
|
||||
|
||||
p.dump_token()
|
||||
|
||||
return match token.type {
|
||||
.integer {IntegerLiteral{token.text.int()}}
|
||||
.real {RealLiteral{token.text.f32()}}
|
||||
.boolean {BoolLiteral{token.text == 'true'}}
|
||||
.identifier {Variable{token.text}}
|
||||
.type {TypeExpr{token.text}}
|
||||
.kw_fn {Function{token.text}}
|
||||
else {parse_error("Unexpected Token")}
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut p Parser) parse_expr() Expr {
|
||||
mut left := p.parse_primary()
|
||||
|
||||
match p.peek().type {
|
||||
.plus {return p.parse_binary(left, '+')}
|
||||
.minus {return p.parse_binary(left, '-')}
|
||||
.star {return p.parse_binary(left, '*')}
|
||||
.slash {return p.parse_binary(left, '/')}
|
||||
.equals {return p.parse_binary(left, '=')}
|
||||
else {return left}
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut p Parser) parse_binary(left Expr, op string) BinaryExpr {
|
||||
p.next()
|
||||
right := p.parse_expr()
|
||||
return BinaryExpr{left, op, right}
|
||||
}
|
||||
|
||||
fn (mut p Parser) get_expr_type(expr Expr) string {
|
||||
return match expr {
|
||||
IntegerLiteral {'int'}
|
||||
RealLiteral {'real'}
|
||||
BoolLiteral {'bool'}
|
||||
VoidExpr {'void'}
|
||||
BinaryExpr {
|
||||
left_t := p.get_expr_type(expr.left)
|
||||
right_t := p.get_expr_type(expr.right)
|
||||
if left_t != right_t {
|
||||
parse_error ('Type mismatch in expression: ${left_t} and ${right_t}')
|
||||
}
|
||||
left_t
|
||||
}
|
||||
Variable {
|
||||
p.dump_stmt()
|
||||
info := p.symbols.lookup_var(expr.name) or {
|
||||
parse_error("Undefined variable ${expr.name}")
|
||||
}
|
||||
return info.type
|
||||
}
|
||||
else {"Tried getting type of unexpected Expr"}
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------- Statements
|
||||
|
||||
fn (mut p Parser) get_return_stmts_recursive(block Block) []ReturnStmt {
|
||||
mut returns := []ReturnStmt{}
|
||||
for stmt in block.stmts {
|
||||
if stmt is ReturnStmt {
|
||||
returns << stmt
|
||||
}
|
||||
if stmt is Block {
|
||||
returns << p.get_return_stmts_recursive(stmt)
|
||||
}
|
||||
}
|
||||
return returns
|
||||
}
|
||||
|
||||
fn (mut p Parser) parse_statement() Stmt {
|
||||
match p.peek().type {
|
||||
.kw_let {return p.parse_var_decl()}
|
||||
.kw_return {return p.parse_return_stmt()}
|
||||
.kw_fn {return p.parse_func_decl()}
|
||||
.lbracket {return p.parse_block()}
|
||||
else {return p.parse_expr_stmt()}
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut p Parser) parse_var_decl() VarDecl {
|
||||
p.expect(.kw_let)
|
||||
|
||||
name_tok := p.next()
|
||||
if name_tok.type != .identifier {
|
||||
parse_error("Expected variable name after let")
|
||||
}
|
||||
|
||||
type_tok := p.next()
|
||||
if type_tok.type != .type {
|
||||
parse_error("Expected variable type after name")
|
||||
}
|
||||
|
||||
p.expect(.equals)
|
||||
val := p.parse_expr()
|
||||
|
||||
if type_tok.text == 'void' {
|
||||
parse_error("Cannot declare a variable of type void")
|
||||
}
|
||||
if p.get_expr_type(val) != type_tok.text {
|
||||
parse_error("Mismatch between declared type (${type_tok.text}) and actual type (${p.get_expr_type(val)})")
|
||||
}
|
||||
p.expect(.semicolon)
|
||||
|
||||
p.symbols.define_var(name_tok.text, type_tok.text)
|
||||
|
||||
return VarDecl {
|
||||
name: name_tok.text
|
||||
value: val
|
||||
type: type_tok.text
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut p Parser) parse_func_decl() FuncDecl {
|
||||
|
||||
if !p.symbols.is_in_global_scope() {parse_error("Tried to define a function in a non-global scope")}
|
||||
|
||||
p.expect(.kw_fn)
|
||||
|
||||
name_tok := p.next()
|
||||
if name_tok.type != .identifier {
|
||||
parse_error("Expected variable name after let")
|
||||
}
|
||||
|
||||
p.expect(.lparen)
|
||||
p.expect(.rparen)
|
||||
|
||||
type_tok := p.next()
|
||||
if type_tok.type != .type {
|
||||
parse_error("Expected variable type after name")
|
||||
}
|
||||
|
||||
block := p.parse_block()
|
||||
|
||||
return_stmts := p.get_return_stmts_recursive(block)
|
||||
|
||||
for return_stmt in return_stmts {
|
||||
if p.get_expr_type(return_stmt.value) != type_tok.text {
|
||||
parse_error("Mismatch between declared return type (${type_tok.text}) \
|
||||
and actual return type (${p.get_expr_type(return_stmt.value)})")
|
||||
}
|
||||
}
|
||||
|
||||
p.symbols.define_func(name_tok.text, type_tok.text)
|
||||
|
||||
return FuncDecl {
|
||||
name: name_tok.text
|
||||
ret_type: type_tok.text
|
||||
block: block
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut p Parser) parse_return_stmt() ReturnStmt {
|
||||
p.expect(.kw_return)
|
||||
|
||||
token := p.peek()
|
||||
p.dump_token()
|
||||
mut expr := Expr{}
|
||||
expr = match token.type {
|
||||
.integer {IntegerLiteral{token.text.int()}}
|
||||
.real {RealLiteral{token.text.f32()}}
|
||||
.boolean {BoolLiteral{token.text == 'true'}}
|
||||
.identifier {Variable{token.text}}
|
||||
.semicolon {VoidExpr{}}
|
||||
else {parse_error("Unexpected Token")}
|
||||
}
|
||||
p.next()
|
||||
if !(expr is VoidExpr) {
|
||||
p.expect(.semicolon)
|
||||
}
|
||||
return ReturnStmt {
|
||||
value: expr
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut p Parser) parse_expr_stmt() ExprStmt {
|
||||
expr := p.parse_expr()
|
||||
p.expect(.semicolon)
|
||||
|
||||
return ExprStmt {
|
||||
expr: expr
|
||||
}
|
||||
}
|
||||
|
||||
fn (mut p Parser) parse_block() Block {
|
||||
p.expect(.lbracket)
|
||||
|
||||
mut statements := []Stmt{}
|
||||
|
||||
p.symbols.variable_scopes << map[string]VarSymbolInfo{}
|
||||
$if debug {
|
||||
println("entering scope")
|
||||
}
|
||||
|
||||
for p.peek().type != .rbracket && p.peek().type != .eof {
|
||||
statements << p.parse_statement()
|
||||
}
|
||||
|
||||
p.expect(.rbracket)
|
||||
|
||||
|
||||
return_stmts := (statements.filter(it is ReturnStmt).map(it as ReturnStmt))
|
||||
if return_stmts.len > 0 && (return_stmts.len > 1 || Stmt(return_stmts[0]) != statements[statements.len - 1]) {
|
||||
parse_error("Unexpected use of return. Unreachable code")
|
||||
}
|
||||
|
||||
p.symbols.variable_scopes.delete_last()
|
||||
|
||||
$if debug {
|
||||
println("exiting scope")
|
||||
}
|
||||
|
||||
return Block {
|
||||
stmts: statements
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
fn (mut p Parser) parse_program() []Stmt {
|
||||
p.symbols.variable_scopes << map[string]VarSymbolInfo{}
|
||||
for p.peek().type != .eof {
|
||||
p.statements << p.parse_statement()
|
||||
}
|
||||
p.symbols.variable_scopes.delete_last()
|
||||
return p.statements
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user