diff --git a/lexer.v b/lexer.v index 3757296..0f9d203 100644 --- a/lexer.v +++ b/lexer.v @@ -1,11 +1,30 @@ module main +import term + enum TokenType as u8 { integer real - operator keyword identifier + plus + minus + star + slash + equals + less + greater + lparen + rparen + lsqparen + rsqparen + lbracket + rbracket + dot + comma + semicolon + colon + newline eof unknown } @@ -15,47 +34,95 @@ struct Token { text string } + fn str_from_toktype(type TokenType) string { return match type { .integer {'integer'} .real {'real'} - .operator {'operator'} .keyword {'keyword'} .identifier {'identifier'} .eof {'EOF'} .unknown {'unknown'} + .plus {'plus'} + .minus {'minus'} + .star {'star'} + .slash {'slash'} + .equals {'equals'} + .less {'less'} + .greater {'greater'} + .lparen {'lparen'} + .rparen {'rparen'} + .lsqparen {'lsqparen'} + .rsqparen {'rsqparen'} + .lbracket {'lbracket'} + .rbracket {'rbracket'} + .dot {'dot'} + .comma {'comma'} + .semicolon {'semicolon'} + .colon {'colon'} + .newline {'newline'} + } +} + +fn toktype_from_delimiter(delimiter string) TokenType { + return match delimiter { + '(' {.lparen} + ')' {.rparen} + '[' {.lsqparen} + ']' {.rsqparen} + '{' {.lbracket} + '}' {.rbracket} + '+' {.plus} + '-' {.minus} + '*' {.star} + '/' {.slash} + '=' {.equals} + '<' {.less} + '>' {.greater} + '.' {.dot} + ',' {.comma} + ';' {.semicolon} + ':' {.colon} + '\n' {.newline} + else {.unknown} } } fn is_delimiter(c u8) bool { - return " +-*/,;%<>()[]{}=\n".contains(c.ascii_str()) -} - -fn is_operator(c u8) bool { - return "+-*/=".contains(c.ascii_str()) + return " +-*/.,;:%<>()[]{}=\n".contains(c.ascii_str()) } fn is_real(str string) bool { - left, right := str.split_once(".") or {return false} return !right.contains(".") && left.is_int() && right.is_int() } fn is_keyword(str string) bool { return [ - "void", "int", "real", "if", "else", "while", "break", "fn", "return" + "void", "int", "real", "bool", "if", "else", "for", "break", "fn", "return" ].contains(str) } fn print_tok(tok Token) { - println("${tok.text:8} (${str_from_toktype(tok.type)})") + println("${tok.text.replace("\n", "\\n"):8} (${str_from_toktype(tok.type)})") } -fn lex(input string) { +fn print_toks(toks []Token) { + for tok in toks { + print_tok(tok) + } +} + +fn lex(input string) ?[]Token { mut left := 0 mut right := 0 + mut line := 1 + mut tokens := []Token{} for (right < input.len && left <= right) { + if input[right] == `\n` { + line++ + } if !is_delimiter(input[right]) { right++ } @@ -63,8 +130,8 @@ fn lex(input string) { break } if is_delimiter(input[right]) && left == right { - if is_operator(input[right]) { - print_tok(Token{TokenType.operator, input[right].ascii_str()}) + if input[right] != ` ` { + tokens << Token{toktype_from_delimiter(input[right].ascii_str()), input[right].ascii_str()} } right++ left = right @@ -72,18 +139,20 @@ fn lex(input string) { else if (is_delimiter(input[right]) && left != right) || (right == input.len && left != right) { subs := input.substr(left, right) if is_keyword(subs) { - print_tok(Token{TokenType.keyword, subs}) + tokens << Token{TokenType.keyword, subs} } else if subs.is_int() { - print_tok(Token{TokenType.integer, subs}) + tokens << Token{TokenType.integer, subs} } else if is_real(subs) { - print_tok(Token{TokenType.real, subs}) + tokens << Token{TokenType.real, subs} } else if subs.is_identifier() { - print_tok(Token{TokenType.identifier, subs}) + tokens << Token{TokenType.identifier, subs} } else if !subs.is_identifier() && !is_delimiter(input[right-1]) { - print_tok(Token{TokenType.unknown, subs}) + eprintln(term.red("ERROR: found invalid token " + subs + " at line " + line.str())) + return none } left = right } } - print_tok(Token{TokenType.eof, "EOF"}) + tokens << Token{TokenType.eof, "EOF"} + return tokens } diff --git a/main.v b/main.v index 0b4cf19..e8bd6c9 100644 --- a/main.v +++ b/main.v @@ -1,7 +1,10 @@ module main +import os + fn main() { - content := "real x = 6.50;" - println("for string : \"" + content + "\"") - lex(content) + content := os.read_file("test.one") or { return } + println("---------\n" + content + "---------") + tokens := lex(content) or { return } + print_toks(tokens) } diff --git a/test.one b/test.one new file mode 100644 index 0000000..0a36409 --- /dev/null +++ b/test.one @@ -0,0 +1,3 @@ +fn bool singledigit(int x) { + return x < 10; +} diff --git a/v.mod b/v.mod index 1e8b39c..d22950c 100644 --- a/v.mod +++ b/v.mod @@ -1,6 +1,6 @@ Module { - name: 'vlexer' - description: 'onelang lexer in v' + name: 'onev' + description: 'onelang compuler in v' version: '0.0.0' license: 'MIT' dependencies: []