208 lines
4.1 KiB
V
208 lines
4.1 KiB
V
module main
|
|
|
|
import term
|
|
|
|
enum TokenType as u8 {
|
|
kw_let
|
|
kw_const
|
|
type
|
|
kw_if
|
|
kw_else
|
|
kw_elif
|
|
kw_for
|
|
kw_break
|
|
kw_fn
|
|
kw_return
|
|
kw_print
|
|
kw_class
|
|
integer
|
|
real
|
|
boolean
|
|
string
|
|
identifier
|
|
plus
|
|
minus
|
|
star
|
|
slash
|
|
equals
|
|
less
|
|
greater
|
|
eq_eq
|
|
greater_eq
|
|
less_eq
|
|
not_eq
|
|
plus_eq
|
|
minus_eq
|
|
star_eq
|
|
slash_eq
|
|
increment
|
|
decrement
|
|
lparen
|
|
rparen
|
|
lsqparen
|
|
rsqparen
|
|
lbracket
|
|
rbracket
|
|
dot
|
|
comma
|
|
semicolon
|
|
colon
|
|
eof
|
|
unknown
|
|
}
|
|
|
|
struct Token {
|
|
type TokenType
|
|
text string
|
|
}
|
|
|
|
fn str_from_toktype(type TokenType) string {
|
|
return type.str()
|
|
}
|
|
|
|
fn toktype_from_delimiter(delimiter string) TokenType {
|
|
return match delimiter {
|
|
'(' {.lparen}
|
|
')' {.rparen}
|
|
'[' {.lsqparen}
|
|
']' {.rsqparen}
|
|
'{' {.lbracket}
|
|
'}' {.rbracket}
|
|
'+' {.plus}
|
|
'-' {.minus}
|
|
'*' {.star}
|
|
'/' {.slash}
|
|
'=' {.equals}
|
|
'<' {.less}
|
|
'>' {.greater}
|
|
'.' {.dot}
|
|
',' {.comma}
|
|
';' {.semicolon}
|
|
':' {.colon}
|
|
'==' {.eq_eq}
|
|
'>=' {.greater_eq}
|
|
'<=' {.less_eq}
|
|
'!=' {.not_eq}
|
|
'+=' {.plus_eq}
|
|
'-=' {.minus_eq}
|
|
'*=' {.star_eq}
|
|
'/=' {.slash_eq}
|
|
'++' {.increment}
|
|
'--' {.decrement}
|
|
else {.unknown}
|
|
}
|
|
}
|
|
|
|
fn toktype_from_kw(kw string) TokenType {
|
|
return match kw {
|
|
'let' {.kw_let}
|
|
'const' {.kw_const}
|
|
'void', 'real', 'bool', 'int', 'string'{.type}
|
|
'if' {.kw_if}
|
|
'else' {.kw_else}
|
|
'elif' {.kw_elif}
|
|
'for' {.kw_for}
|
|
'break' {.kw_break}
|
|
'fn' {.kw_fn}
|
|
'return' {.kw_return}
|
|
'true', 'false' {.boolean}
|
|
'print' {.kw_print}
|
|
'class' {.kw_class}
|
|
else {.unknown}
|
|
}
|
|
}
|
|
|
|
fn is_delimiter(c u8, is_inside_number bool) bool {
|
|
valid_chars := match is_inside_number {
|
|
true {" +-*/,;:%<>()[]{}=\n\""}
|
|
false {". +-*/,;:%<>()[]{}=\n\""}
|
|
}
|
|
return valid_chars.contains(c.ascii_str())
|
|
}
|
|
|
|
fn is_real(str string) bool {
|
|
left, right := str.split_once(".") or {return false}
|
|
return !right.contains(".") && left.is_int() && right.is_int()
|
|
}
|
|
|
|
fn is_keyword(str string) bool {
|
|
return [
|
|
"void", "int", "real", "bool", "string", "if", "else", "elif", "for", "break", "fn", "return", "let", "const", "true", "false", "print", "class"
|
|
].contains(str)
|
|
}
|
|
|
|
fn print_tok(tok Token) {
|
|
println("${tok.text.replace("\n", "\\n"):8} (${str_from_toktype(tok.type)})")
|
|
}
|
|
|
|
fn print_toks(toks []Token) {
|
|
for tok in toks {
|
|
print_tok(tok)
|
|
}
|
|
}
|
|
|
|
fn lex(input string) ?[]Token {
|
|
mut left := 0
|
|
mut right := 0
|
|
mut line := 1
|
|
mut tokens := []Token{}
|
|
mut is_inside_number := false
|
|
mut is_inside_string := false
|
|
|
|
for (right < input.len && left <= right) {
|
|
for is_inside_string {
|
|
right++
|
|
if input[right].ascii_str() == '\"' {
|
|
is_inside_string = false
|
|
right++
|
|
tokens << Token{.string, input.substr(left+1, right-1)}
|
|
left = right
|
|
}
|
|
}
|
|
is_inside_number = input[left].ascii_str().is_int()
|
|
if input[right] == `\n` {
|
|
line++
|
|
}
|
|
if !is_delimiter(input[right], is_inside_number) {
|
|
right++
|
|
}
|
|
if right >= input.len {
|
|
break
|
|
}
|
|
if is_delimiter(input[right], is_inside_number) && left == right {
|
|
if !input[right].is_space() {
|
|
if input[right].ascii_str() == '\"' {is_inside_string = true; continue}
|
|
mut tok_str := input[right].ascii_str()
|
|
if right + 1 < input.len {
|
|
combined := input.substr(right, right + 2)
|
|
if combined in ['==', '>=', '<=', '!=', '+=', '-=', '*=', '/=', '++', '--'] {
|
|
tok_str = combined
|
|
right++
|
|
}
|
|
}
|
|
tokens << Token{toktype_from_delimiter(tok_str), tok_str}
|
|
}
|
|
right++
|
|
left = right
|
|
}
|
|
else if (is_delimiter(input[right], is_inside_number) && left != right) || (right == input.len && left != right) {
|
|
subs := input.substr(left, right)
|
|
if is_keyword(subs) {
|
|
tokens << Token{toktype_from_kw(subs), subs}
|
|
} else if subs.is_int() {
|
|
tokens << Token{TokenType.integer, subs}
|
|
} else if is_real(subs) {
|
|
tokens << Token{TokenType.real, subs}
|
|
} else if subs.is_identifier() {
|
|
tokens << Token{TokenType.identifier, subs}
|
|
} else if !subs.is_identifier() && !is_delimiter(input[right-1], is_inside_number) {
|
|
eprintln(term.red("ERROR: found invalid token " + subs + " at line " + line.str()))
|
|
return none
|
|
}
|
|
left = right
|
|
}
|
|
}
|
|
tokens << Token{TokenType.eof, "EOF"}
|
|
return tokens
|
|
}
|