module main import term enum TokenType as u8 { integer real keyword identifier plus minus star slash equals less greater lparen rparen lsqparen rsqparen lbracket rbracket dot comma semicolon colon newline eof unknown } struct Token { type TokenType text string } fn str_from_toktype(type TokenType) string { return match type { .integer {'integer'} .real {'real'} .keyword {'keyword'} .identifier {'identifier'} .eof {'EOF'} .unknown {'unknown'} .plus {'plus'} .minus {'minus'} .star {'star'} .slash {'slash'} .equals {'equals'} .less {'less'} .greater {'greater'} .lparen {'lparen'} .rparen {'rparen'} .lsqparen {'lsqparen'} .rsqparen {'rsqparen'} .lbracket {'lbracket'} .rbracket {'rbracket'} .dot {'dot'} .comma {'comma'} .semicolon {'semicolon'} .colon {'colon'} .newline {'newline'} } } fn toktype_from_delimiter(delimiter string) TokenType { return match delimiter { '(' {.lparen} ')' {.rparen} '[' {.lsqparen} ']' {.rsqparen} '{' {.lbracket} '}' {.rbracket} '+' {.plus} '-' {.minus} '*' {.star} '/' {.slash} '=' {.equals} '<' {.less} '>' {.greater} '.' {.dot} ',' {.comma} ';' {.semicolon} ':' {.colon} '\n' {.newline} else {.unknown} } } fn is_delimiter(c u8) bool { return " +-*/.,;:%<>()[]{}=\n".contains(c.ascii_str()) } fn is_real(str string) bool { left, right := str.split_once(".") or {return false} return !right.contains(".") && left.is_int() && right.is_int() } fn is_keyword(str string) bool { return [ "void", "int", "real", "bool", "if", "else", "for", "break", "fn", "return" ].contains(str) } fn print_tok(tok Token) { println("${tok.text.replace("\n", "\\n"):8} (${str_from_toktype(tok.type)})") } fn print_toks(toks []Token) { for tok in toks { print_tok(tok) } } fn lex(input string) ?[]Token { mut left := 0 mut right := 0 mut line := 1 mut tokens := []Token{} for (right < input.len && left <= right) { if input[right] == `\n` { line++ } if !is_delimiter(input[right]) { right++ } if right >= input.len { break } if is_delimiter(input[right]) && left == right { if input[right] != ` ` { tokens << Token{toktype_from_delimiter(input[right].ascii_str()), input[right].ascii_str()} } right++ left = right } else if (is_delimiter(input[right]) && left != right) || (right == input.len && left != right) { subs := input.substr(left, right) if is_keyword(subs) { tokens << Token{TokenType.keyword, subs} } else if subs.is_int() { tokens << Token{TokenType.integer, subs} } else if is_real(subs) { tokens << Token{TokenType.real, subs} } else if subs.is_identifier() { tokens << Token{TokenType.identifier, subs} } else if !subs.is_identifier() && !is_delimiter(input[right-1]) { eprintln(term.red("ERROR: found invalid token " + subs + " at line " + line.str())) return none } left = right } } tokens << Token{TokenType.eof, "EOF"} return tokens }