tanta roba, wip scope

This commit is contained in:
uan
2026-02-03 23:07:33 +01:00
parent fbae7bc5d5
commit 969de6a59b
5 changed files with 474 additions and 19 deletions

59
lexer.v
View File

@@ -3,9 +3,17 @@ module main
import term
enum TokenType as u8 {
kw_let
type
kw_if
kw_else
kw_for
kw_break
kw_fn
kw_return
integer
real
keyword
boolean
identifier
plus
minus
@@ -24,7 +32,6 @@ enum TokenType as u8 {
comma
semicolon
colon
newline
eof
unknown
}
@@ -34,12 +41,19 @@ struct Token {
text string
}
fn str_from_toktype(type TokenType) string {
return match type {
.integer {'integer'}
.real {'real'}
.keyword {'keyword'}
.boolean {'boolean'}
.kw_let {'let'}
.type {'type'}
.kw_if {'if'}
.kw_else {'else'}
.kw_for {'for'}
.kw_break {'break'}
.kw_fn {'break'}
.kw_return {'return'}
.identifier {'identifier'}
.eof {'EOF'}
.unknown {'unknown'}
@@ -60,7 +74,6 @@ fn str_from_toktype(type TokenType) string {
.comma {'comma'}
.semicolon {'semicolon'}
.colon {'colon'}
.newline {'newline'}
}
}
@@ -83,13 +96,27 @@ fn toktype_from_delimiter(delimiter string) TokenType {
',' {.comma}
';' {.semicolon}
':' {.colon}
'\n' {.newline}
else {.unknown}
}
}
fn is_delimiter(c u8) bool {
return " +-*/.,;:%<>()[]{}=\n".contains(c.ascii_str())
fn toktype_from_kw(kw string) TokenType {
return match kw {
'let' {.kw_let}
'void', 'real', 'bool', 'int' {.type}
'if' {.kw_if}
'else' {.kw_else}
'for' {.kw_for}
'break' {.kw_break}
'fn' {.kw_fn}
'return' {.kw_return}
'true', 'false' {.boolean}
else {.unknown}
}
}
fn is_delimiter(c u8, is_inside_number bool) bool {
return " +-*/.,;:%<>()[]{}=\n".contains(c.ascii_str()) && (c.ascii_str() != '.' || !is_inside_number)
}
fn is_real(str string) bool {
@@ -99,7 +126,7 @@ fn is_real(str string) bool {
fn is_keyword(str string) bool {
return [
"void", "int", "real", "bool", "if", "else", "for", "break", "fn", "return"
"void", "int", "real", "bool", "if", "else", "for", "break", "fn", "return", "let", "true", "false"
].contains(str)
}
@@ -118,35 +145,37 @@ fn lex(input string) ?[]Token {
mut right := 0
mut line := 1
mut tokens := []Token{}
mut is_inside_number := false
for (right < input.len && left <= right) {
if input[right] == `\n` {
line++
}
if !is_delimiter(input[right]) {
if !is_delimiter(input[right], is_inside_number) {
is_inside_number = input[left].str().is_int()
right++
}
if right >= input.len {
break
}
if is_delimiter(input[right]) && left == right {
if input[right] != ` ` {
if is_delimiter(input[right], is_inside_number) && left == right {
if !input[right].is_space() {
tokens << Token{toktype_from_delimiter(input[right].ascii_str()), input[right].ascii_str()}
}
right++
left = right
}
else if (is_delimiter(input[right]) && left != right) || (right == input.len && left != right) {
else if (is_delimiter(input[right], is_inside_number) && left != right) || (right == input.len && left != right) {
subs := input.substr(left, right)
if is_keyword(subs) {
tokens << Token{TokenType.keyword, subs}
tokens << Token{toktype_from_kw(subs), subs}
} else if subs.is_int() {
tokens << Token{TokenType.integer, subs}
} else if is_real(subs) {
tokens << Token{TokenType.real, subs}
} else if subs.is_identifier() {
tokens << Token{TokenType.identifier, subs}
} else if !subs.is_identifier() && !is_delimiter(input[right-1]) {
} else if !subs.is_identifier() && !is_delimiter(input[right-1], is_inside_number) {
eprintln(term.red("ERROR: found invalid token " + subs + " at line " + line.str()))
return none
}