module main import term enum TokenType as u8 { kw_let kw_const type kw_if kw_else kw_for kw_break kw_fn kw_return kw_print kw_struct integer real boolean identifier plus minus star slash equals less greater eq_eq greater_eq less_eq not_eq plus_eq minus_eq star_eq slash_eq increment decrement lparen rparen lsqparen rsqparen lbracket rbracket dot comma semicolon colon eof unknown } struct Token { type TokenType text string } fn str_from_toktype(type TokenType) string { return type.str() } fn toktype_from_delimiter(delimiter string) TokenType { return match delimiter { '(' {.lparen} ')' {.rparen} '[' {.lsqparen} ']' {.rsqparen} '{' {.lbracket} '}' {.rbracket} '+' {.plus} '-' {.minus} '*' {.star} '/' {.slash} '=' {.equals} '<' {.less} '>' {.greater} '.' {.dot} ',' {.comma} ';' {.semicolon} ':' {.colon} '==' {.eq_eq} '>=' {.greater_eq} '<=' {.less_eq} '!=' {.not_eq} '+=' {.plus_eq} '-=' {.minus_eq} '*=' {.star_eq} '/=' {.slash_eq} '++' {.increment} '--' {.decrement} else {.unknown} } } fn toktype_from_kw(kw string) TokenType { return match kw { 'let' {.kw_let} 'const' {.kw_const} 'void', 'real', 'bool', 'int' {.type} 'if' {.kw_if} 'else' {.kw_else} 'for' {.kw_for} 'break' {.kw_break} 'fn' {.kw_fn} 'return' {.kw_return} 'true', 'false' {.boolean} 'print' {.kw_print} 'struct' {.kw_struct} else {.unknown} } } fn is_delimiter(c u8, is_inside_number bool) bool { return " +-*/.,;:%<>()[]{}=\n".contains(c.ascii_str()) && (c.ascii_str() != '.' || !is_inside_number) } fn is_real(str string) bool { left, right := str.split_once(".") or {return false} return !right.contains(".") && left.is_int() && right.is_int() } fn is_keyword(str string) bool { return [ "void", "int", "real", "bool", "if", "else", "for", "break", "fn", "return", "let", "const", "true", "false", "print", "struct" ].contains(str) } fn print_tok(tok Token) { println("${tok.text.replace("\n", "\\n"):8} (${str_from_toktype(tok.type)})") } fn print_toks(toks []Token) { for tok in toks { print_tok(tok) } } fn lex(input string) ?[]Token { mut left := 0 mut right := 0 mut line := 1 mut tokens := []Token{} mut is_inside_number := false for (right < input.len && left <= right) { if input[right] == `\n` { line++ } if !is_delimiter(input[right], is_inside_number) { is_inside_number = input[left].str().is_int() right++ } if right >= input.len { break } if is_delimiter(input[right], is_inside_number) && left == right { if !input[right].is_space() { mut tok_str := input[right].ascii_str() if right + 1 < input.len { combined := input.substr(right, right + 2) if combined in ['==', '>=', '<=', '!=', '+=', '-=', '*=', '/=', '++', '--'] { tok_str = combined right++ } } tokens << Token{toktype_from_delimiter(tok_str), tok_str} } right++ left = right } else if (is_delimiter(input[right], is_inside_number) && left != right) || (right == input.len && left != right) { subs := input.substr(left, right) if is_keyword(subs) { tokens << Token{toktype_from_kw(subs), subs} } else if subs.is_int() { tokens << Token{TokenType.integer, subs} } else if is_real(subs) { tokens << Token{TokenType.real, subs} } else if subs.is_identifier() { tokens << Token{TokenType.identifier, subs} } else if !subs.is_identifier() && !is_delimiter(input[right-1], is_inside_number) { eprintln(term.red("ERROR: found invalid token " + subs + " at line " + line.str())) return none } left = right } } tokens << Token{TokenType.eof, "EOF"} return tokens }