commit 38504582ae1cc6f9c6104d779b7cbb8f2367f5b0 Author: uan Date: Mon Feb 2 17:58:00 2026 +0100 first commit diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..01072ca --- /dev/null +++ b/.editorconfig @@ -0,0 +1,8 @@ +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.v] +indent_style = tab diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9a98968 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,8 @@ +* text=auto eol=lf +*.bat eol=crlf + +*.v linguist-language=V +*.vv linguist-language=V +*.vsh linguist-language=V +v.mod linguist-language=V +.vdocignore linguist-language=ignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9a4f52a --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +# Binaries for programs and plugins +main +vlexer +*.exe +*.exe~ +*.so +*.dylib +*.dll + +# Ignore binary output folders +bin/ + +# Ignore common editor/system specific metadata +.DS_Store +.idea/ +.vscode/ +*.iml + +# ENV +.env + +# vweb and database +*.db +*.js diff --git a/lexer.v b/lexer.v new file mode 100644 index 0000000..3757296 --- /dev/null +++ b/lexer.v @@ -0,0 +1,89 @@ +module main + +enum TokenType as u8 { + integer + real + operator + keyword + identifier + eof + unknown +} + +struct Token { + type TokenType + text string +} + +fn str_from_toktype(type TokenType) string { + return match type { + .integer {'integer'} + .real {'real'} + .operator {'operator'} + .keyword {'keyword'} + .identifier {'identifier'} + .eof {'EOF'} + .unknown {'unknown'} + } +} + +fn is_delimiter(c u8) bool { + return " +-*/,;%<>()[]{}=\n".contains(c.ascii_str()) +} + +fn is_operator(c u8) bool { + return "+-*/=".contains(c.ascii_str()) +} + +fn is_real(str string) bool { + + left, right := str.split_once(".") or {return false} + return !right.contains(".") && left.is_int() && right.is_int() +} + +fn is_keyword(str string) bool { + return [ + "void", "int", "real", "if", "else", "while", "break", "fn", "return" + ].contains(str) +} + +fn print_tok(tok Token) { + println("${tok.text:8} (${str_from_toktype(tok.type)})") +} + +fn lex(input string) { + mut left := 0 + mut right := 0 + + for (right < input.len && left <= right) { + if !is_delimiter(input[right]) { + right++ + } + if right >= input.len { + break + } + if is_delimiter(input[right]) && left == right { + if is_operator(input[right]) { + print_tok(Token{TokenType.operator, input[right].ascii_str()}) + } + right++ + left = right + } + else if (is_delimiter(input[right]) && left != right) || (right == input.len && left != right) { + subs := input.substr(left, right) + if is_keyword(subs) { + print_tok(Token{TokenType.keyword, subs}) + } else if subs.is_int() { + print_tok(Token{TokenType.integer, subs}) + } else if is_real(subs) { + print_tok(Token{TokenType.real, subs}) + } else if subs.is_identifier() { + print_tok(Token{TokenType.identifier, subs}) + } else if !subs.is_identifier() && !is_delimiter(input[right-1]) { + print_tok(Token{TokenType.unknown, subs}) + } + left = right + } + } + print_tok(Token{TokenType.eof, "EOF"}) +} diff --git a/main.v b/main.v new file mode 100644 index 0000000..0b4cf19 --- /dev/null +++ b/main.v @@ -0,0 +1,7 @@ +module main + +fn main() { + content := "real x = 6.50;" + println("for string : \"" + content + "\"") + lex(content) +} diff --git a/v.mod b/v.mod new file mode 100644 index 0000000..1e8b39c --- /dev/null +++ b/v.mod @@ -0,0 +1,7 @@ +Module { + name: 'vlexer' + description: 'onelang lexer in v' + version: '0.0.0' + license: 'MIT' + dependencies: [] +}