From 2c51bf073e9e58ae550a5cf3618f852ef790f4a6 Mon Sep 17 00:00:00 2001 From: Soispha Date: Wed, 1 Nov 2023 13:15:27 +0100 Subject: [PATCH] feat(trinitry): Initialize the parsing crate --- trinitry/.gitignore | 6 ++ trinitry/Cargo.toml | 15 +++ trinitry/docs-header.html | 45 +++++++++ trinitry/src/lib.rs | 192 +++++++++++++++++++++++++++++++++++++ trinitry/src/trinitry.pest | 25 +++++ 5 files changed, 283 insertions(+) create mode 100644 trinitry/.gitignore create mode 100644 trinitry/Cargo.toml create mode 100644 trinitry/docs-header.html create mode 100644 trinitry/src/lib.rs create mode 100644 trinitry/src/trinitry.pest diff --git a/trinitry/.gitignore b/trinitry/.gitignore new file mode 100644 index 0000000..4424904 --- /dev/null +++ b/trinitry/.gitignore @@ -0,0 +1,6 @@ +# build +/target +/result + +# It is a library +Cargo.lock diff --git a/trinitry/Cargo.toml b/trinitry/Cargo.toml new file mode 100644 index 0000000..7856ba1 --- /dev/null +++ b/trinitry/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "trinitry" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +pest = "2.7.5" +pest_derive = {version = "2.7.5", features = ["grammar-extras"]} +thiserror = "1.0.50" + +# The header imports katex, a js latex parser, into the doc comments +[package.metadata.docs.rs] +rustdoc-args = [ "--html-in-header", "./docs-header.html" ] diff --git a/trinitry/docs-header.html b/trinitry/docs-header.html new file mode 100644 index 0000000..beafd4d --- /dev/null +++ b/trinitry/docs-header.html @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + diff --git a/trinitry/src/lib.rs b/trinitry/src/lib.rs new file mode 100644 index 0000000..2af4dd3 --- /dev/null +++ b/trinitry/src/lib.rs @@ -0,0 +1,192 @@ +//! This crate is a parser for the 'Trinitry' (not 'Trinity') language, used to map all sort of +//! Functions to a memorable command. +//! +//! This parser is more of a validator, as Trinitry does not support any language features besides +//! the aforementioned commands and arguments. That includes some simple constructs like: '||' (OR) +//! or '&&' (AND). If you need these features, simple write them in the language, you've written your +//! Function in. +//! +//! # General specification +//! ## Command +//! Basically every command can be a series of alphanumeric ASCII values. +//! +//! Correctly spoken, the Language, containing all valid command names, is just the Kleene closure +//! over an Alphabet $\Sigma$, which contains all alphanumeric characters: +//! $$ \Sigma_{cmd} = \\{x | 0 \leqslant x \leqslant 9\\} \cup \\{x | "a" \leqslant x \leqslant "z"\\} \cup \\{x | "A" \leqslant x \leqslant "Z"\\} \cup \\{"\\_", "\text{-}", "."\\} $$ +//! +//! ## Argument +//! Arguments are similar to the command, although they can also contain spaces and quotes, +//! if it's quoted and additional characters (here notated as "$\\dots{}$"): +//! $$ \Sigma_{args-quoted} = \Sigma_{cmd} \cup \\{"\\text{"}", "\\ ", \\dots{}\\} $$ +//! $$ \Sigma_{args-single-quoted} = \Sigma_{cmd} \cup \\{"'", "\\ ", \\dots{}\\} $$ +//! $$ \Sigma_{args} = \Sigma_{cmd} \cup \\{\\dots{}\\} $$ +//! Look at the [trinitry.pest](../../../src/trinitry.pest) file for a full list of the additional +//! allowed characters. +//! +//! # Examples +//! ## Command +//! A valid command would be something like that: +//! ```text +//! quit +//! ``` +//! something like that would not be valid however, as Trinitry does not support these 'complex' +//! language features: +//! ```text +//! write && quit +//! ``` +//! ## Arguments +//! A valid argumented command would be: +//! ```text +//! lua "function() print('Hi!') end" +//! ``` +//! Whilst this would not be valid (that is, it would very likely not be what you want): +//! ```text +//! lua "function() print("Hi!") end" +//! ``` +//! as the double quotes in the print statement actually unquote the argument, leaving you with +//! three arguments: +//! 1. `function() print(` +//! 1. `Hi!` +//! 1. `) end` +use std::fmt::Display; + +use pest::{error::Error, Parser}; +use pest_derive::Parser; + +#[derive(Parser)] +#[grammar = "trinitry.pest"] +pub struct Trinitry { + command: String, + arguments: Vec, +} + +impl Trinitry { + pub fn new(input: &str) -> Result> { + let parsed = Self::parse(Rule::trinitry, input)?; + + let command = { + let command: Vec<_> = parsed.clone().find_tagged("command").collect(); + + // Ensure that we have only one command + // This should be ensured by the grammar, thus the 'debug_assert' + debug_assert_eq!(command.len(), 1); + + // PERFORMANCE(@soispha): Replace this with `mem::take` (when pairs implements Default) + // <2023-11-01> + command + .first() + .expect("This should contain exactly one element") + .to_owned() + }; + let arguments: Vec<_> = parsed.clone().find_tagged("argument").collect(); + + Ok(Trinitry { + command: command.as_str().to_owned(), + arguments: arguments + .iter() + .map(|arg| { + let mut arg = arg.as_str().trim(); + arg = if let Some(new_arg) = arg.strip_prefix("\"") { + new_arg + } else { + arg + }; + arg = if let Some(new_arg) = arg.strip_suffix("\"") { + new_arg + } else { + arg + }; + + arg = if let Some(new_arg) = arg.strip_prefix("'") { + new_arg + } else { + arg + }; + arg = if let Some(new_arg) = arg.strip_suffix("'") { + new_arg + } else { + arg + }; + arg.to_owned() + }) + .collect(), + }) + } +} + +impl Display for Trinitry { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.arguments.is_empty() { + f.write_str(&self.command) + } else { + f.write_fmt(format_args!( + "{} {}", + &self.command, + &self.arguments.join(" ") + )) + } + } +} + +mod tests; + +#[cfg(test)] +mod test { + use crate::Trinitry; + + #[test] + fn parse_cmd() { + let string = "quit"; + let p = Trinitry::new(string).unwrap_or_else(|e| { + panic!("{}", e); + }); + assert_eq!(&p.command, "quit"); + assert!(&p.arguments.is_empty()); + } + + #[test] + fn parse_arg_clean() { + let string = r##"lua print("Hi")"##; + let p = Trinitry::new(string).unwrap_or_else(|e| { + panic!("{}", e); + }); + assert_eq!(&p.command, "lua"); + assert_eq!(&p.arguments[0], r#"print("Hi")"#); + } + + #[test] + fn parse_arg_quote() { + let string = r##"write "some 'file' name""##; + let p = Trinitry::new(string).unwrap_or_else(|e| { + panic!("{}", e); + }); + assert_eq!(&p.command, "write"); + assert_eq!(&p.arguments[0], "some 'file' name"); + } + + #[test] + fn parse_arg_single_quote() { + let string = r##"write 'some "file" name'"##; + let p = Trinitry::new(string).unwrap_or_else(|e| { + panic!("{}", e); + }); + assert_eq!(&p.command, "write"); + assert_eq!(&p.arguments[0], "some \"file\" name"); + } + + #[test] + fn parse_arg_multi() { + let string = r##"write 'some "file" name' "other name" last"##; + let p = Trinitry::new(string).unwrap_or_else(|e| { + panic!("{}", e); + }); + + let expected_args = vec!["some \"file\" name", "other name", "last"] + .iter() + .map(|str| (*str).to_owned()) + .collect::>(); + + assert_eq!(&p.command, "write"); + assert_eq!(&p.arguments, &expected_args); + } +} diff --git a/trinitry/src/trinitry.pest b/trinitry/src/trinitry.pest new file mode 100644 index 0000000..c868a6d --- /dev/null +++ b/trinitry/src/trinitry.pest @@ -0,0 +1,25 @@ +chars = { ASCII_ALPHANUMERIC | "_" | "-" | "." } + +// TODO(@soispha): Are these all the valid characters? <2023-11-01> +argument_chars = { chars | "(" | ")" | "{" | "}" | "<" | ">" | "?" | "!" | "+" | "^" | "@" +| "&" | "*" | "~" | "|" | "=" | "," | "\\" | "/" } +whitespace = _{ " " } // lower case to avoid special treatment of 'WHITESPACE' + +quote = _{ "\"" } +q = _{ quote } + +single_quote = _{ "'" } +sq = _{ single_quote } + + + +command = { chars+ } + +arg_quoted = { q ~ (!q ~ (argument_chars | " " | "'" ))+ ~ q } +arg_single_quoted = { sq ~ (!sq ~ (argument_chars | " " | "\"" ))+ ~ sq } +arg = { (argument_chars | "\"" | "'")+ } + +argument = { whitespace+ ~ (arg_quoted | arg_single_quoted | arg )} + + +trinitry = { SOI ~ #command = command ~ (#argument = argument)* ~ EOI }