feat(trixy-lang_parser): Add a lexer with error handling for trixy code

2023-12-16 11:45:23 +01:00 · 2023-12-16 11:45:23 +01:00 · 3da75f6913
parent cd2dbc516a
commit 3da75f6913
12 changed files with 798 additions and 0 deletions
--- a/trixy/trixy-lang_parser/.gitignore
+++ b/trixy/trixy-lang_parser/.gitignore
@ -0,0 +1,6 @@
+# build
+/target
+/result
+
+# lua_macros is a library
+Cargo.lock
--- a/trixy/trixy-lang_parser/Cargo.toml
+++ b/trixy/trixy-lang_parser/Cargo.toml
@ -0,0 +1,11 @@
+[package]
+name = "trixy-lang_parser"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+clap = { version = "4.4.11", features = ["derive"] }
+pretty_assertions = "1.4.0"
+thiserror = "1.0.50"
--- a/trixy/trixy-lang_parser/example/example.tri
+++ b/trixy/trixy-lang_parser/example/example.tri
@ -0,0 +1,9 @@
+fn print(message: CommandTransferValue);
+
+nasp trinitrix {
+    fn hi(name: String) -> String;
+}
+
+
+// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing:
+// vim: syntax=rust
--- a/trixy/trixy-lang_parser/example/example_simple.tri
+++ b/trixy/trixy-lang_parser/example/example_simple.tri
@ -0,0 +1,11 @@
+fn print(message: CommandTransferValue);
+
+nasp trinitrix {
+    fn hi(name: String) -> String;
+}
+namespace commands { >-
+}
+
+
+// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing:
+// vim: syntax=rust
--- a/trixy/trixy-lang_parser/src/command_spec/mod.rs
+++ b/trixy/trixy-lang_parser/src/command_spec/mod.rs
@ -0,0 +1,36 @@
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub struct CommandSpec {
+    pub(crate) declarations: Vec<Declaration>,
+}
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub(crate) struct Declaration {
+    pub(crate) namespace: Vec<Namespace>,
+    pub(crate) genus: Genus,
+}
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub(crate) struct Namespace {
+    pub(crate) name: String,
+}
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub(crate) enum Genus {
+    Function {
+        name: String,
+        inputs: Vec<NamedType>,
+        output: Type,
+    },
+}
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub(crate) struct NamedType {
+    pub(crate) name: String,
+    pub(crate) base: Type,
+}
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub(crate) enum Type {
+    String,
+    Void,
+}
--- a/trixy/trixy-lang_parser/src/error.rs
+++ b/trixy/trixy-lang_parser/src/error.rs
@ -0,0 +1,9 @@
+use thiserror::Error;
+
+use crate::lexing::error::SpannedLexingError;
+
+#[derive(Error, Debug)]
+pub enum TrixyError {
+    #[error(transparent)]
+    Parsing(#[from] SpannedLexingError),
+}
--- a/trixy/trixy-lang_parser/src/lexing/error.rs
+++ b/trixy/trixy-lang_parser/src/lexing/error.rs
@ -0,0 +1,100 @@
+use std::{error::Error, fmt::Display};
+use thiserror::Error;
+
+#[derive(Error, Debug)]
+pub enum LexingError {
+    #[error("No matches were found")]
+    NoMatchesTaken,
+    #[error("Expected an token, but reached end of file")]
+    UnexpectedEOF,
+    #[error("Char ('{0}') is not a know token!")]
+    UnknownCharacter(char),
+    #[error("The Arrow token must be of the form: ->")]
+    ExpectedArrow,
+}
+
+#[derive(Debug)]
+pub enum SpannedLexingError {
+    Error {
+        source: LexingError,
+        /// The starting char index of the error in the source file
+        start: usize,
+        /// The starting char index of the error in the context line
+        contexted_start: usize,
+        /// The line above the error
+        line_above: String,
+        /// The line below the error
+        line_below: String,
+        /// The line in which the error occurred
+        line: String,
+        /// The line number of the main error line
+        line_number: usize,
+    },
+}
+
+impl Error for SpannedLexingError {
+    fn source(&self) -> Option<&(dyn Error + 'static)> {
+        let Self::Error { source, .. } = self;
+        Some(source)
+    }
+}
+
+impl Display for SpannedLexingError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let Self::Error {
+            source,
+            line_above,
+            line_below,
+            line,
+            line_number,
+            contexted_start,
+            ..
+        } = self;
+        let error_line = {
+            let mut output = String::new();
+            output.push_str("\x1b[92;1m");
+            for _ in 0..(*contexted_start) {
+                output.push(' ');
+            }
+            line_number.to_string().chars().for_each(|_| {
+                output.push(' ');
+            });
+            output.push('^');
+            for _ in *contexted_start..(line.len() - 1) {
+                output.push('-');
+            }
+            output.push(' ');
+            let appandig_str = match source {
+                LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(),
+                LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(),
+                LexingError::UnknownCharacter(char) => format!("This char: `{char}`; is not a valid token"),
+                LexingError::ExpectedArrow => "The `-` token is interpretet as a started arrow (`->`), but we could not find the arrow tip (`>`)".to_owned(),
+            };
+            output.push_str(&appandig_str);
+            output.push_str("\x1b[0m");
+            output
+        };
+
+        writeln!(f, "\x1b[31;1merror: \x1b[37;1m{}\x1b[0m", source)?;
+        if !line_above.is_empty() {
+            writeln!(
+                f,
+                "\x1b[32;1m{} |\x1b[0m     {}",
+                line_number - 1,
+                line_above
+            )?;
+        }
+        writeln!(f, "\x1b[36;1m{} |\x1b[0m     {}", line_number, line)?;
+        writeln!(f, "       {}", error_line)?;
+        if !line_below.is_empty() {
+            writeln!(
+                f,
+                "\x1b[32;1m{} |\x1b[0m     {}",
+                line_number + 1,
+                line_below
+            )
+        } else {
+            write!(f, "")
+        }
+    }
+}
--- a/trixy/trixy-lang_parser/src/lexing/mod.rs
+++ b/trixy/trixy-lang_parser/src/lexing/mod.rs
@ -0,0 +1,84 @@
+use self::{error::SpannedLexingError, tokenizer::Tokenizer};
+
+pub mod error;
+mod tokenizer;
+
+#[cfg(test)]
+mod test;
+
+#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
+pub struct TokenStream {
+    original_file: String,
+    tokens: Vec<Token>,
+}
+
+impl TokenStream {
+    /// Turn a string of valid Trixy code into a list of tokens, including the
+    /// location of that token's start and end point in the original source code.
+    ///
+    /// Note the token indices represent the half-open interval `[start, end)`,
+    /// equivalent to `start .. end` in Rust.
+    pub fn lex(src: &str) -> Result<Self, SpannedLexingError> {
+        let mut tokenizer = Tokenizer::new(src);
+        let mut tokens = Vec::new();
+
+        while let Some(tok) = tokenizer.next_token()? {
+            tokens.push(tok);
+        }
+
+        Ok(Self {
+            tokens,
+            original_file: src.to_owned(),
+        })
+    }
+}
+
+/// A token span is recorded in chars starting from the beginning of the file:
+/// A token span like this, for example:
+/// ```no_run
+/// TokenSpan {
+///     start: 20,
+///     end: 23,
+/// }
+/// ```
+/// signals, that the token starts at the 20th char in the source file and ends on the 23rd.
+#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
+pub struct TokenSpan {
+    start: usize,
+    /// The start of the token span
+    end: usize,
+}
+
+/// A Token
+#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
+pub struct Token {
+    /// The token's original location in the source file
+    span: TokenSpan,
+    kind: TokenKind,
+}
+
+/// Possibly kinds of tokens
+#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
+pub enum TokenKind {
+    Keyword(Keyword),
+    Identifier(String),
+    Colon,
+    Semicolon,
+    Comma,
+    Arrow,
+    BraceOpen,
+    BraceClose,
+    ParenthesisOpen,
+    ParenthesisClose,
+}
+
+/// Keywords used in the language
+#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
+pub enum Keyword {
+    /// Start a namespace declaration
+    #[allow(non_camel_case_types)]
+    nasp,
+    /// Start a function declaration
+    #[allow(non_camel_case_types)]
+    r#fn,
+}
--- a/trixy/trixy-lang_parser/src/lexing/test.rs
+++ b/trixy/trixy-lang_parser/src/lexing/test.rs
@ -0,0 +1,194 @@
+use crate::lexing::{Keyword, Token, TokenKind, TokenSpan};
+
+use super::TokenStream;
+
+use pretty_assertions::assert_eq;
+
+#[test]
+fn test_lexing_trixy() {
+    let input = "
+nasp commands {
+    fn expect(event: String) -> String;
+}
+";
+    let token_stream = TokenStream::lex(input).unwrap();
+    let expected_token_stream = {
+        let tokens = vec![
+            Token {
+                span: TokenSpan { start: 1, end: 5 },
+                kind: TokenKind::Keyword(Keyword::nasp),
+            },
+            Token {
+                span: TokenSpan { start: 6, end: 14 },
+                kind: TokenKind::Identifier("commands".to_owned()),
+            },
+            Token {
+                span: TokenSpan { start: 15, end: 16 },
+                kind: TokenKind::BraceOpen,
+            },
+            Token {
+                span: TokenSpan { start: 21, end: 23 },
+                kind: TokenKind::Keyword(Keyword::r#fn),
+            },
+            Token {
+                span: TokenSpan { start: 24, end: 30 },
+                kind: TokenKind::Identifier("expect".to_owned()),
+            },
+            Token {
+                span: TokenSpan { start: 30, end: 31 },
+                kind: TokenKind::ParenthesisOpen,
+            },
+            Token {
+                span: TokenSpan { start: 31, end: 36 },
+                kind: TokenKind::Identifier("event".to_owned()),
+            },
+            Token {
+                span: TokenSpan { start: 36, end: 37 },
+                kind: TokenKind::Colon,
+            },
+            Token {
+                span: TokenSpan { start: 38, end: 44 },
+                kind: TokenKind::Identifier("String".to_owned()),
+            },
+            Token {
+                span: TokenSpan { start: 44, end: 45 },
+                kind: TokenKind::ParenthesisClose,
+            },
+            Token {
+                span: TokenSpan { start: 46, end: 48 },
+                kind: TokenKind::Arrow,
+            },
+            Token {
+                span: TokenSpan { start: 49, end: 55 },
+                kind: TokenKind::Identifier("String".to_owned()),
+            },
+            Token {
+                span: TokenSpan { start: 55, end: 56 },
+                kind: TokenKind::Semicolon,
+            },
+            Token {
+                span: TokenSpan { start: 57, end: 58 },
+                kind: TokenKind::BraceClose,
+            },
+        ];
+        TokenStream {
+            tokens,
+            original_file: input.to_owned(),
+        }
+    };
+    assert_eq!(token_stream, expected_token_stream)
+}
+
+#[test]
+fn test_failing_lexing() {
+    let input = "
+nasp trinitrix {
+    nasp - commands {
+        fn hi(strings: String) -> String;
+    }
+}
+";
+    let token_stream = TokenStream::lex(input);
+    eprintln!("{}", token_stream.as_ref().unwrap_err());
+
+    // uncomment the next line to see the error message, without having to remove cargo's output filter
+    // assert!(!token_stream.is_err());
+    assert!(token_stream.is_err());
+}
+
+#[test]
+fn test_multiple_tokens() {
+    let input = "
+nasp nasp {{
+}}
+";
+    let token_stream = TokenStream::lex(input).unwrap();
+    let expected_token_stream = {
+        let tokens = vec![
+            Token {
+                span: TokenSpan { start: 1, end: 5 },
+                kind: TokenKind::Keyword(Keyword::nasp),
+            },
+            Token {
+                span: TokenSpan { start: 6, end: 10 },
+                kind: TokenKind::Keyword(Keyword::nasp),
+            },
+            Token {
+                span: TokenSpan { start: 11, end: 12 },
+                kind: TokenKind::BraceOpen,
+            },
+            Token {
+                span: TokenSpan { start: 12, end: 13 },
+                kind: TokenKind::BraceOpen,
+            },
+            Token {
+                span: TokenSpan { start: 14, end: 15 },
+                kind: TokenKind::BraceClose,
+            },
+            Token {
+                span: TokenSpan { start: 15, end: 16 },
+                kind: TokenKind::BraceClose,
+            },
+        ];
+        TokenStream {
+            tokens,
+            original_file: input.to_owned(),
+        }
+    };
+    assert_eq!(token_stream, expected_token_stream)
+}
+
+#[test]
+fn test_comments() {
+    let input = "
+        // Some comment
+        nasp nasp {{
+
+        }}
+        // NOTE(@soispha): We do not support nested multi line comments <2023-12-16>
+        /* Some
+        * multi
+        * line
+        * comment
+        */
+";
+    let token_stream = TokenStream::lex(input)
+        .map_err(|e| {
+            eprintln!("{}", e);
+            panic!();
+        })
+        .unwrap();
+    let expected_token_stream = {
+        let tokens = vec![
+            Token {
+                span: TokenSpan { start: 33, end: 37 },
+                kind: TokenKind::Keyword(Keyword::nasp),
+            },
+            Token {
+                span: TokenSpan { start: 38, end: 42 },
+                kind: TokenKind::Keyword(Keyword::nasp),
+            },
+            Token {
+                span: TokenSpan { start: 43, end: 44 },
+                kind: TokenKind::BraceOpen,
+            },
+            Token {
+                span: TokenSpan { start: 44, end: 45 },
+                kind: TokenKind::BraceOpen,
+            },
+            Token {
+                span: TokenSpan { start: 55, end: 56 },
+                kind: TokenKind::BraceClose,
+            },
+            Token {
+                span: TokenSpan { start: 56, end: 57 },
+                kind: TokenKind::BraceClose,
+            },
+        ];
+        TokenStream {
+            tokens,
+            original_file: input.to_owned(),
+        }
+    };
+    assert_eq!(token_stream, expected_token_stream)
+}
--- a/trixy/trixy-lang_parser/src/lexing/tokenizer.rs
+++ b/trixy/trixy-lang_parser/src/lexing/tokenizer.rs
@ -0,0 +1,235 @@
+// This code is heavily inspired by: https://michael-f-bryan.github.io/static-analyser-in-rust/book/lex.html
+
+use crate::lexing::{Keyword, TokenSpan};
+
+use super::{
+    error::{LexingError, SpannedLexingError},
+    Token, TokenKind,
+};
+
+pub(super) struct Tokenizer<'a> {
+    current_index: usize,
+    remaining_text: &'a str,
+    original_text: &'a str,
+}
+
+impl<'a> Tokenizer<'a> {
+    pub(super) fn new(input: &'a str) -> Self {
+        Self {
+            current_index: 0,
+            remaining_text: input,
+            original_text: input,
+        }
+    }
+    pub(super) fn next_token(&mut self) -> Result<Option<Token>, SpannedLexingError> {
+        self.skip_ignored_tokens();
+        if self.remaining_text.is_empty() {
+            return Ok(None);
+        } else {
+            let start = self.current_index;
+
+            let (token_kind, index) = self.get_next_tokenkind().map_err(|e| {
+                let (line_above, line, line_below, contexted_start, line_number) = {
+                    let line_number = self
+                        .original_text
+                        .chars()
+                        .take(start)
+                        .filter(|a| a == &'\n')
+                        .count();
+                    let lines: Vec<_> = self.original_text.lines().collect();
+
+                    let line = (*lines
+                        .get(line_number)
+                        .expect("This should work, as have *at least* one (index = 0) line"))
+                    .to_owned();
+
+                    let contexted_start = {
+                        let matched_line: Vec<_> = self.original_text.match_indices(&line).collect();
+                        let (index, matched_line) = matched_line.get(0).expect("This first index should always match, as we took the line from the string in the first place");
+                        debug_assert_eq!(matched_line, &&line);
+                        start - index
+                    };
+
+                    let line_above;
+                    if line_number == 0 {
+                        // We only have one line, so no line above
+                        line_above = "".to_owned();
+                    } else {
+                        line_above = (*lines
+                            .get(line_number - 1)
+                            .expect("We checked that this should work"))
+                        .to_owned();
+                    }
+
+                    let line_below;
+                    if lines.len() - 1 > line_number {
+                        // We have a line after the current line
+                        line_below = (*lines
+                            .get(line_number + 1)
+                            .expect("We checked that this should work"))
+                        .to_owned();
+                    } else {
+                        line_below = "".to_owned();
+                    }
+
+                    (line_above, line, line_below, contexted_start, line_number)
+                };
+                SpannedLexingError::Error {
+                    source: e,
+                    start,
+                    contexted_start,
+                    line_above,
+                    line_below,
+                    line_number,
+                    line,
+                }
+            })?;
+            self.chomp(index); // end - start
+            let end = self.current_index;
+            Ok(Some(Token {
+                span: TokenSpan { start, end },
+                kind: token_kind,
+            }))
+        }
+    }
+
+    fn get_next_tokenkind(&mut self) -> Result<(TokenKind, usize), LexingError> {
+        let next = match self.remaining_text.chars().next() {
+            Some(c) => c,
+            None => return Err(LexingError::UnexpectedEOF),
+        };
+
+        let (tok, length) = match next {
+            '(' => (TokenKind::ParenthesisOpen, 1),
+            ')' => (TokenKind::ParenthesisClose, 1),
+            '{' => (TokenKind::BraceOpen, 1),
+            '}' => (TokenKind::BraceClose, 1),
+            ':' => (TokenKind::Colon, 1),
+            ';' => (TokenKind::Semicolon, 1),
+            ',' => (TokenKind::Comma, 1),
+            '-' => tokenize_arrow(self.remaining_text)?,
+            c @ '_' | c if c.is_alphanumeric() => tokenize_ident(self.remaining_text)?,
+            other => return Err(LexingError::UnknownCharacter(other)),
+        };
+
+        Ok((tok, length))
+    }
+
+    /// Skip past any whitespace characters or comments.
+    fn skip_ignored_tokens(&mut self) {
+        loop {
+            let ws = self.skip_whitespace();
+            let comments = self.skip_comments();
+
+            if ws + comments == 0 {
+                return;
+            }
+        }
+    }
+    fn skip_whitespace(&mut self) -> usize {
+        let mut remaining = self.remaining_text;
+
+        // Filter out whitespace
+        let _ws = {
+            let ws = match take_while(remaining, |ch| ch.is_whitespace()) {
+                Ok((_, bytes_skipped)) => bytes_skipped,
+                _ => 0,
+            };
+            remaining = &remaining[ws..];
+            ws
+        };
+        // let comments = skip_comments(remaining);
+        // remaining = &remaining[comments..];
+
+        let skip = self.remaining_text.len() - remaining.len();
+        self.chomp(skip);
+        skip
+    }
+
+    fn skip_comments(&mut self) -> usize {
+        let remaining = self.remaining_text;
+        let pairs = [("//", "\n"), ("/*", "*/")];
+
+        let mut skip = 0;
+        for &(pattern, matcher) in &pairs {
+            if remaining.starts_with(pattern) {
+                let leftovers = skip_until(remaining, matcher);
+                skip = remaining.len() - leftovers.len();
+                break;
+            }
+        }
+        self.chomp(skip);
+        skip
+    }
+
+    fn chomp(&mut self, chars_to_chomp: usize) {
+        self.remaining_text = &self.remaining_text[chars_to_chomp..];
+        self.current_index += chars_to_chomp;
+    }
+}
+
+fn tokenize_ident(text: &str) -> Result<(TokenKind, usize), LexingError> {
+    let (got, chars_read) = take_while(text, |ch| ch == '_' || ch.is_alphanumeric())?;
+
+    // Filter out keywords
+    let tokenkind = match got {
+        "nasp" => TokenKind::Keyword(Keyword::nasp),
+        "fn" => TokenKind::Keyword(Keyword::r#fn),
+        other => TokenKind::Identifier(other.to_string()),
+    };
+
+    Ok((tokenkind, chars_read))
+}
+
+fn tokenize_arrow(text: &str) -> Result<(TokenKind, usize), LexingError> {
+    let mut chars = text.chars();
+    if let Some(char) = chars.next() {
+        if char == '-' {
+            if let Some(char) = chars.next() {
+                if char == '>' {
+                    return Ok((TokenKind::Arrow, 2));
+                }
+            }
+        }
+    }
+    // This is a implicit else as the other if clauses return
+    Err(LexingError::ExpectedArrow)
+}
+
+/// Consumes bytes while a predicate evaluates to true.
+fn take_while<F>(data: &str, mut pred: F) -> Result<(&str, usize), LexingError>
+where
+    F: FnMut(char) -> bool,
+{
+    let mut current_index = 0;
+
+    for ch in data.chars() {
+        let should_continue = pred(ch);
+
+        if !should_continue {
+            break;
+        }
+
+        current_index += ch.len_utf8();
+    }
+
+    if current_index == 0 {
+        Err(LexingError::NoMatchesTaken)
+    } else {
+        Ok((&data[..current_index], current_index))
+    }
+}
+
+/// Skips input until the remaining string pattern starts with the pattern
+fn skip_until<'a>(mut src: &'a str, pattern: &str) -> &'a str {
+    while !src.is_empty() && !src.starts_with(pattern) {
+        let next_char_size = src
+            .chars()
+            .next()
+            .expect("The string isn't empty")
+            .len_utf8();
+        src = &src[next_char_size..];
+    }
+
+    &src[pattern.len()..]
+}
--- a/trixy/trixy-lang_parser/src/lib.rs
+++ b/trixy/trixy-lang_parser/src/lib.rs
@ -0,0 +1,58 @@
+use error::TrixyError;
+
+use crate::lexing::TokenStream;
+
+use self::command_spec::CommandSpec;
+
+mod command_spec;
+pub mod error;
+pub mod lexing;
+
+pub fn parse_trixy_lang(input: &str) -> Result<CommandSpec, TrixyError> {
+    let input_tokens = TokenStream::lex(input)?;
+
+    todo!()
+}
+
+#[cfg(test)]
+mod test {
+    use crate::{
+        command_spec::{CommandSpec, Declaration, Genus, NamedType, Namespace, Type},
+        parse_trixy_lang,
+    };
+
+    #[test]
+    fn test_function_with_namespace() {
+        let expected = parse_trixy_lang(
+            "
+                nasp commands {
+                    fn say_something(name_to_greet: String, what_to_say: String) -> String;
+                }
+            ",
+        )
+        .unwrap();
+        let correct: CommandSpec = {
+            let declarations = vec![Declaration {
+                namespace: vec![Namespace {
+                    name: "commands".to_owned(),
+                }],
+                genus: Genus::Function {
+                    name: "say_something".to_owned(),
+                    inputs: vec![
+                        NamedType {
+                            name: "name_to_greet".to_owned(),
+                            base: Type::String,
+                        },
+                        NamedType {
+                            name: "what_to_say".to_owned(),
+                            base: Type::String,
+                        },
+                    ],
+                    output: Type::String,
+                },
+            }];
+            CommandSpec { declarations }
+        };
+        assert_eq!(expected, correct);
+    }
+}
--- a/trixy/trixy-lang_parser/src/main.rs
+++ b/trixy/trixy-lang_parser/src/main.rs
@ -0,0 +1,45 @@
+use std::{fs, process::exit};
+
+use trixy_lang_parser::lexing::TokenStream;
+
+use std::path::PathBuf;
+
+use clap::{Parser, Subcommand};
+
+/// A helper command for the trixy-lang_parser crate
+#[derive(Parser, Debug)]
+#[clap(author, version, about, long_about = None)]
+pub struct Args {
+    #[command(subcommand)]
+    /// The subcommand to execute
+    pub subcommand: Command,
+}
+#[derive(Subcommand, Debug)]
+pub enum Command {
+    #[clap(value_parser)]
+    /// Only try to tokenize the file
+    Tokenize {
+        #[clap(value_parser)]
+        /// The file containing the trixy code to tokenize
+        file: PathBuf,
+    },
+}
+
+pub fn main() {
+    let args = Args::parse();
+    match args.subcommand {
+        Command::Tokenize { file } => {
+            let input = fs::read_to_string(file).unwrap();
+
+            let input_tokens = match TokenStream::lex(&input) {
+                Ok(err) => err,
+                Err(ok) => {
+                    println!("{}", ok);
+                    exit(1);
+                }
+            };
+
+            println!("{:#?}", input_tokens);
+        }
+    }
+}