forked from trinitrix/core
1
0
Fork 0

feat(trinitry): Initialize the parsing crate

This commit is contained in:
Benedikt Peetz 2023-11-01 13:15:27 +01:00
parent fcdfb4488b
commit 2c51bf073e
Signed by: bpeetz
GPG Key ID: A5E94010C3A642AD
5 changed files with 283 additions and 0 deletions

6
trinitry/.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
# build
/target
/result
# It is a library
Cargo.lock

15
trinitry/Cargo.toml Normal file
View File

@ -0,0 +1,15 @@
[package]
name = "trinitry"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
pest = "2.7.5"
pest_derive = {version = "2.7.5", features = ["grammar-extras"]}
thiserror = "1.0.50"
# The header imports katex, a js latex parser, into the doc comments
[package.metadata.docs.rs]
rustdoc-args = [ "--html-in-header", "./docs-header.html" ]

45
trinitry/docs-header.html Normal file
View File

@ -0,0 +1,45 @@
<!doctype html>
<!-- KaTeX requires the use of the HTML5 doctype. Without it, KaTeX may not render properly -->
<html>
<head>
<link
rel="stylesheet"
href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css"
integrity="sha384-n8MVd4RsNIU0tAv4ct0nTaAbDJwPJzDEaqSD1odI+WdtXRGWt2kTvGFasHpSy3SV"
crossorigin="anonymous"
/>
<!-- The loading of KaTeX is deferred to speed up page rendering -->
<script
defer
src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js"
integrity="sha384-XjKyOOlGwcjNTAIQHIpgOno0Hl1YQqzUOEleOLALmuqehneUG+vnGctmUb0ZY0l8"
crossorigin="anonymous"
></script>
<!-- To automatically render math in text elements, include the auto-render extension: -->
<script
defer
src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/contrib/auto-render.min.js"
integrity="sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05"
crossorigin="anonymous"
></script>
<script>
document.addEventListener("DOMContentLoaded", function () {
renderMathInElement(document.body, {
delimiters: [
{ left: "$$", right: "$$", display: true },
{ left: "\\(", right: "\\)", display: false },
{ left: "$", right: "$", display: false },
{ left: "\\[", right: "\\]", display: true },
]
// FIXME(@soispha): This removes the quotes completely <2023-10-31>
// macros: {
// "”": "\\noexpand ”",
// "“": "\\noexpand “",
// },
});
});
</script>
</head>
</html>

192
trinitry/src/lib.rs Normal file
View File

@ -0,0 +1,192 @@
//! This crate is a parser for the 'Trinitry' (not 'Trinity') language, used to map all sort of
//! Functions to a memorable command.
//!
//! This parser is more of a validator, as Trinitry does not support any language features besides
//! the aforementioned commands and arguments. That includes some simple constructs like: '||' (OR)
//! or '&&' (AND). If you need these features, simple write them in the language, you've written your
//! Function in.
//!
//! # General specification
//! ## Command
//! Basically every command can be a series of alphanumeric ASCII values.
//!
//! Correctly spoken, the Language, containing all valid command names, is just the Kleene closure
//! over an Alphabet $\Sigma$, which contains all alphanumeric characters:
//! $$ \Sigma_{cmd} = \\{x | 0 \leqslant x \leqslant 9\\} \cup \\{x | "a" \leqslant x \leqslant "z"\\} \cup \\{x | "A" \leqslant x \leqslant "Z"\\} \cup \\{"\\_", "\text{-}", "."\\} $$
//!
//! ## Argument
//! Arguments are similar to the command, although they can also contain spaces and quotes,
//! if it's quoted and additional characters (here notated as "$\\dots{}$"):
//! $$ \Sigma_{args-quoted} = \Sigma_{cmd} \cup \\{"\\text{"}", "\\ ", \\dots{}\\} $$
//! $$ \Sigma_{args-single-quoted} = \Sigma_{cmd} \cup \\{"'", "\\ ", \\dots{}\\} $$
//! $$ \Sigma_{args} = \Sigma_{cmd} \cup \\{\\dots{}\\} $$
//! Look at the [trinitry.pest](../../../src/trinitry.pest) file for a full list of the additional
//! allowed characters.
//!
//! # Examples
//! ## Command
//! A valid command would be something like that:
//! ```text
//! quit
//! ```
//! something like that would not be valid however, as Trinitry does not support these 'complex'
//! language features:
//! ```text
//! write && quit
//! ```
//! ## Arguments
//! A valid argumented command would be:
//! ```text
//! lua "function() print('Hi!') end"
//! ```
//! Whilst this would not be valid (that is, it would very likely not be what you want):
//! ```text
//! lua "function() print("Hi!") end"
//! ```
//! as the double quotes in the print statement actually unquote the argument, leaving you with
//! three arguments:
//! 1. `function() print(`
//! 1. `Hi!`
//! 1. `) end`
use std::fmt::Display;
use pest::{error::Error, Parser};
use pest_derive::Parser;
#[derive(Parser)]
#[grammar = "trinitry.pest"]
pub struct Trinitry {
command: String,
arguments: Vec<String>,
}
impl Trinitry {
pub fn new(input: &str) -> Result<Self, Error<Rule>> {
let parsed = Self::parse(Rule::trinitry, input)?;
let command = {
let command: Vec<_> = parsed.clone().find_tagged("command").collect();
// Ensure that we have only one command
// This should be ensured by the grammar, thus the 'debug_assert'
debug_assert_eq!(command.len(), 1);
// PERFORMANCE(@soispha): Replace this with `mem::take` (when pairs implements Default)
// <2023-11-01>
command
.first()
.expect("This should contain exactly one element")
.to_owned()
};
let arguments: Vec<_> = parsed.clone().find_tagged("argument").collect();
Ok(Trinitry {
command: command.as_str().to_owned(),
arguments: arguments
.iter()
.map(|arg| {
let mut arg = arg.as_str().trim();
arg = if let Some(new_arg) = arg.strip_prefix("\"") {
new_arg
} else {
arg
};
arg = if let Some(new_arg) = arg.strip_suffix("\"") {
new_arg
} else {
arg
};
arg = if let Some(new_arg) = arg.strip_prefix("'") {
new_arg
} else {
arg
};
arg = if let Some(new_arg) = arg.strip_suffix("'") {
new_arg
} else {
arg
};
arg.to_owned()
})
.collect(),
})
}
}
impl Display for Trinitry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.arguments.is_empty() {
f.write_str(&self.command)
} else {
f.write_fmt(format_args!(
"{} {}",
&self.command,
&self.arguments.join(" ")
))
}
}
}
mod tests;
#[cfg(test)]
mod test {
use crate::Trinitry;
#[test]
fn parse_cmd() {
let string = "quit";
let p = Trinitry::new(string).unwrap_or_else(|e| {
panic!("{}", e);
});
assert_eq!(&p.command, "quit");
assert!(&p.arguments.is_empty());
}
#[test]
fn parse_arg_clean() {
let string = r##"lua print("Hi")"##;
let p = Trinitry::new(string).unwrap_or_else(|e| {
panic!("{}", e);
});
assert_eq!(&p.command, "lua");
assert_eq!(&p.arguments[0], r#"print("Hi")"#);
}
#[test]
fn parse_arg_quote() {
let string = r##"write "some 'file' name""##;
let p = Trinitry::new(string).unwrap_or_else(|e| {
panic!("{}", e);
});
assert_eq!(&p.command, "write");
assert_eq!(&p.arguments[0], "some 'file' name");
}
#[test]
fn parse_arg_single_quote() {
let string = r##"write 'some "file" name'"##;
let p = Trinitry::new(string).unwrap_or_else(|e| {
panic!("{}", e);
});
assert_eq!(&p.command, "write");
assert_eq!(&p.arguments[0], "some \"file\" name");
}
#[test]
fn parse_arg_multi() {
let string = r##"write 'some "file" name' "other name" last"##;
let p = Trinitry::new(string).unwrap_or_else(|e| {
panic!("{}", e);
});
let expected_args = vec!["some \"file\" name", "other name", "last"]
.iter()
.map(|str| (*str).to_owned())
.collect::<Vec<String>>();
assert_eq!(&p.command, "write");
assert_eq!(&p.arguments, &expected_args);
}
}

View File

@ -0,0 +1,25 @@
chars = { ASCII_ALPHANUMERIC | "_" | "-" | "." }
// TODO(@soispha): Are these all the valid characters? <2023-11-01>
argument_chars = { chars | "(" | ")" | "{" | "}" | "<" | ">" | "?" | "!" | "+" | "^" | "@"
| "&" | "*" | "~" | "|" | "=" | "," | "\\" | "/" }
whitespace = _{ " " } // lower case to avoid special treatment of 'WHITESPACE'
quote = _{ "\"" }
q = _{ quote }
single_quote = _{ "'" }
sq = _{ single_quote }
command = { chars+ }
arg_quoted = { q ~ (!q ~ (argument_chars | " " | "'" ))+ ~ q }
arg_single_quoted = { sq ~ (!sq ~ (argument_chars | " " | "\"" ))+ ~ sq }
arg = { (argument_chars | "\"" | "'")+ }
argument = { whitespace+ ~ (arg_quoted | arg_single_quoted | arg )}
trinitry = { SOI ~ #command = command ~ (#argument = argument)* ~ EOI }