more token types

This commit is contained in:
uan
2026-02-02 19:34:10 +01:00
parent 38504582ae
commit fbae7bc5d5
4 changed files with 99 additions and 24 deletions

107
lexer.v
View File

@@ -1,11 +1,30 @@
module main module main
import term
enum TokenType as u8 { enum TokenType as u8 {
integer integer
real real
operator
keyword keyword
identifier identifier
plus
minus
star
slash
equals
less
greater
lparen
rparen
lsqparen
rsqparen
lbracket
rbracket
dot
comma
semicolon
colon
newline
eof eof
unknown unknown
} }
@@ -15,47 +34,95 @@ struct Token {
text string text string
} }
fn str_from_toktype(type TokenType) string { fn str_from_toktype(type TokenType) string {
return match type { return match type {
.integer {'integer'} .integer {'integer'}
.real {'real'} .real {'real'}
.operator {'operator'}
.keyword {'keyword'} .keyword {'keyword'}
.identifier {'identifier'} .identifier {'identifier'}
.eof {'EOF'} .eof {'EOF'}
.unknown {'unknown'} .unknown {'unknown'}
.plus {'plus'}
.minus {'minus'}
.star {'star'}
.slash {'slash'}
.equals {'equals'}
.less {'less'}
.greater {'greater'}
.lparen {'lparen'}
.rparen {'rparen'}
.lsqparen {'lsqparen'}
.rsqparen {'rsqparen'}
.lbracket {'lbracket'}
.rbracket {'rbracket'}
.dot {'dot'}
.comma {'comma'}
.semicolon {'semicolon'}
.colon {'colon'}
.newline {'newline'}
}
}
fn toktype_from_delimiter(delimiter string) TokenType {
return match delimiter {
'(' {.lparen}
')' {.rparen}
'[' {.lsqparen}
']' {.rsqparen}
'{' {.lbracket}
'}' {.rbracket}
'+' {.plus}
'-' {.minus}
'*' {.star}
'/' {.slash}
'=' {.equals}
'<' {.less}
'>' {.greater}
'.' {.dot}
',' {.comma}
';' {.semicolon}
':' {.colon}
'\n' {.newline}
else {.unknown}
} }
} }
fn is_delimiter(c u8) bool { fn is_delimiter(c u8) bool {
return " +-*/,;%<>()[]{}=\n".contains(c.ascii_str()) return " +-*/.,;:%<>()[]{}=\n".contains(c.ascii_str())
}
fn is_operator(c u8) bool {
return "+-*/=".contains(c.ascii_str())
} }
fn is_real(str string) bool { fn is_real(str string) bool {
left, right := str.split_once(".") or {return false} left, right := str.split_once(".") or {return false}
return !right.contains(".") && left.is_int() && right.is_int() return !right.contains(".") && left.is_int() && right.is_int()
} }
fn is_keyword(str string) bool { fn is_keyword(str string) bool {
return [ return [
"void", "int", "real", "if", "else", "while", "break", "fn", "return" "void", "int", "real", "bool", "if", "else", "for", "break", "fn", "return"
].contains(str) ].contains(str)
} }
fn print_tok(tok Token) { fn print_tok(tok Token) {
println("${tok.text:8} (${str_from_toktype(tok.type)})") println("${tok.text.replace("\n", "\\n"):8} (${str_from_toktype(tok.type)})")
} }
fn lex(input string) { fn print_toks(toks []Token) {
for tok in toks {
print_tok(tok)
}
}
fn lex(input string) ?[]Token {
mut left := 0 mut left := 0
mut right := 0 mut right := 0
mut line := 1
mut tokens := []Token{}
for (right < input.len && left <= right) { for (right < input.len && left <= right) {
if input[right] == `\n` {
line++
}
if !is_delimiter(input[right]) { if !is_delimiter(input[right]) {
right++ right++
} }
@@ -63,8 +130,8 @@ fn lex(input string) {
break break
} }
if is_delimiter(input[right]) && left == right { if is_delimiter(input[right]) && left == right {
if is_operator(input[right]) { if input[right] != ` ` {
print_tok(Token{TokenType.operator, input[right].ascii_str()}) tokens << Token{toktype_from_delimiter(input[right].ascii_str()), input[right].ascii_str()}
} }
right++ right++
left = right left = right
@@ -72,18 +139,20 @@ fn lex(input string) {
else if (is_delimiter(input[right]) && left != right) || (right == input.len && left != right) { else if (is_delimiter(input[right]) && left != right) || (right == input.len && left != right) {
subs := input.substr(left, right) subs := input.substr(left, right)
if is_keyword(subs) { if is_keyword(subs) {
print_tok(Token{TokenType.keyword, subs}) tokens << Token{TokenType.keyword, subs}
} else if subs.is_int() { } else if subs.is_int() {
print_tok(Token{TokenType.integer, subs}) tokens << Token{TokenType.integer, subs}
} else if is_real(subs) { } else if is_real(subs) {
print_tok(Token{TokenType.real, subs}) tokens << Token{TokenType.real, subs}
} else if subs.is_identifier() { } else if subs.is_identifier() {
print_tok(Token{TokenType.identifier, subs}) tokens << Token{TokenType.identifier, subs}
} else if !subs.is_identifier() && !is_delimiter(input[right-1]) { } else if !subs.is_identifier() && !is_delimiter(input[right-1]) {
print_tok(Token{TokenType.unknown, subs}) eprintln(term.red("ERROR: found invalid token " + subs + " at line " + line.str()))
return none
} }
left = right left = right
} }
} }
print_tok(Token{TokenType.eof, "EOF"}) tokens << Token{TokenType.eof, "EOF"}
return tokens
} }

9
main.v
View File

@@ -1,7 +1,10 @@
module main module main
import os
fn main() { fn main() {
content := "real x = 6.50;" content := os.read_file("test.one") or { return }
println("for string : \"" + content + "\"") println("---------\n" + content + "---------")
lex(content) tokens := lex(content) or { return }
print_toks(tokens)
} }

3
test.one Normal file
View File

@@ -0,0 +1,3 @@
fn bool singledigit(int x) {
return x < 10;
}

4
v.mod
View File

@@ -1,6 +1,6 @@
Module { Module {
name: 'vlexer' name: 'onev'
description: 'onelang lexer in v' description: 'onelang compuler in v'
version: '0.0.0' version: '0.0.0'
license: 'MIT' license: 'MIT'
dependencies: [] dependencies: []