feat(trinitry): Initialize the parsing crate
This commit is contained in:
parent
196c392f59
commit
091d77089b
|
@ -0,0 +1,6 @@
|
|||
# build
|
||||
/target
|
||||
/result
|
||||
|
||||
# It is a library
|
||||
Cargo.lock
|
|
@ -0,0 +1,15 @@
|
|||
[package]
|
||||
name = "trinitry"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
pest = "2.7.5"
|
||||
pest_derive = {version = "2.7.5", features = ["grammar-extras"]}
|
||||
thiserror = "1.0.50"
|
||||
|
||||
# The header imports katex, a js latex parser, into the doc comments
|
||||
[package.metadata.docs.rs]
|
||||
rustdoc-args = [ "--html-in-header", "./docs-header.html" ]
|
|
@ -0,0 +1,45 @@
|
|||
<!doctype html>
|
||||
<!-- KaTeX requires the use of the HTML5 doctype. Without it, KaTeX may not render properly -->
|
||||
<html>
|
||||
<head>
|
||||
<link
|
||||
rel="stylesheet"
|
||||
href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css"
|
||||
integrity="sha384-n8MVd4RsNIU0tAv4ct0nTaAbDJwPJzDEaqSD1odI+WdtXRGWt2kTvGFasHpSy3SV"
|
||||
crossorigin="anonymous"
|
||||
/>
|
||||
|
||||
<!-- The loading of KaTeX is deferred to speed up page rendering -->
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js"
|
||||
integrity="sha384-XjKyOOlGwcjNTAIQHIpgOno0Hl1YQqzUOEleOLALmuqehneUG+vnGctmUb0ZY0l8"
|
||||
crossorigin="anonymous"
|
||||
></script>
|
||||
|
||||
<!-- To automatically render math in text elements, include the auto-render extension: -->
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/contrib/auto-render.min.js"
|
||||
integrity="sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05"
|
||||
crossorigin="anonymous"
|
||||
></script>
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", function () {
|
||||
renderMathInElement(document.body, {
|
||||
delimiters: [
|
||||
{ left: "$$", right: "$$", display: true },
|
||||
{ left: "\\(", right: "\\)", display: false },
|
||||
{ left: "$", right: "$", display: false },
|
||||
{ left: "\\[", right: "\\]", display: true },
|
||||
]
|
||||
// FIXME(@soispha): This removes the quotes completely <2023-10-31>
|
||||
// macros: {
|
||||
// "”": "\\noexpand ”",
|
||||
// "“": "\\noexpand “",
|
||||
// },
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</head>
|
||||
</html>
|
|
@ -0,0 +1,192 @@
|
|||
//! This crate is a parser for the 'Trinitry' (not 'Trinity') language, used to map all sort of
|
||||
//! Functions to a memorable command.
|
||||
//!
|
||||
//! This parser is more of a validator, as Trinitry does not support any language features besides
|
||||
//! the aforementioned commands and arguments. That includes some simple constructs like: '||' (OR)
|
||||
//! or '&&' (AND). If you need these features, simple write them in the language, you've written your
|
||||
//! Function in.
|
||||
//!
|
||||
//! # General specification
|
||||
//! ## Command
|
||||
//! Basically every command can be a series of alphanumeric ASCII values.
|
||||
//!
|
||||
//! Correctly spoken, the Language, containing all valid command names, is just the Kleene closure
|
||||
//! over an Alphabet $\Sigma$, which contains all alphanumeric characters:
|
||||
//! $$ \Sigma_{cmd} = \\{x | 0 \leqslant x \leqslant 9\\} \cup \\{x | "a" \leqslant x \leqslant "z"\\} \cup \\{x | "A" \leqslant x \leqslant "Z"\\} \cup \\{"\\_", "\text{-}", "."\\} $$
|
||||
//!
|
||||
//! ## Argument
|
||||
//! Arguments are similar to the command, although they can also contain spaces and quotes,
|
||||
//! if it's quoted and additional characters (here notated as "$\\dots{}$"):
|
||||
//! $$ \Sigma_{args-quoted} = \Sigma_{cmd} \cup \\{"\\text{"}", "\\ ", \\dots{}\\} $$
|
||||
//! $$ \Sigma_{args-single-quoted} = \Sigma_{cmd} \cup \\{"'", "\\ ", \\dots{}\\} $$
|
||||
//! $$ \Sigma_{args} = \Sigma_{cmd} \cup \\{\\dots{}\\} $$
|
||||
//! Look at the [trinitry.pest](../../../src/trinitry.pest) file for a full list of the additional
|
||||
//! allowed characters.
|
||||
//!
|
||||
//! # Examples
|
||||
//! ## Command
|
||||
//! A valid command would be something like that:
|
||||
//! ```text
|
||||
//! quit
|
||||
//! ```
|
||||
//! something like that would not be valid however, as Trinitry does not support these 'complex'
|
||||
//! language features:
|
||||
//! ```text
|
||||
//! write && quit
|
||||
//! ```
|
||||
//! ## Arguments
|
||||
//! A valid argumented command would be:
|
||||
//! ```text
|
||||
//! lua "function() print('Hi!') end"
|
||||
//! ```
|
||||
//! Whilst this would not be valid (that is, it would very likely not be what you want):
|
||||
//! ```text
|
||||
//! lua "function() print("Hi!") end"
|
||||
//! ```
|
||||
//! as the double quotes in the print statement actually unquote the argument, leaving you with
|
||||
//! three arguments:
|
||||
//! 1. `function() print(`
|
||||
//! 1. `Hi!`
|
||||
//! 1. `) end`
|
||||
use std::fmt::Display;
|
||||
|
||||
use pest::{error::Error, Parser};
|
||||
use pest_derive::Parser;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[grammar = "trinitry.pest"]
|
||||
pub struct Trinitry {
|
||||
command: String,
|
||||
arguments: Vec<String>,
|
||||
}
|
||||
|
||||
impl Trinitry {
|
||||
pub fn new(input: &str) -> Result<Self, Error<Rule>> {
|
||||
let parsed = Self::parse(Rule::trinitry, input)?;
|
||||
|
||||
let command = {
|
||||
let command: Vec<_> = parsed.clone().find_tagged("command").collect();
|
||||
|
||||
// Ensure that we have only one command
|
||||
// This should be ensured by the grammar, thus the 'debug_assert'
|
||||
debug_assert_eq!(command.len(), 1);
|
||||
|
||||
// PERFORMANCE(@soispha): Replace this with `mem::take` (when pairs implements Default)
|
||||
// <2023-11-01>
|
||||
command
|
||||
.first()
|
||||
.expect("This should contain exactly one element")
|
||||
.to_owned()
|
||||
};
|
||||
let arguments: Vec<_> = parsed.clone().find_tagged("argument").collect();
|
||||
|
||||
Ok(Trinitry {
|
||||
command: command.as_str().to_owned(),
|
||||
arguments: arguments
|
||||
.iter()
|
||||
.map(|arg| {
|
||||
let mut arg = arg.as_str().trim();
|
||||
arg = if let Some(new_arg) = arg.strip_prefix("\"") {
|
||||
new_arg
|
||||
} else {
|
||||
arg
|
||||
};
|
||||
arg = if let Some(new_arg) = arg.strip_suffix("\"") {
|
||||
new_arg
|
||||
} else {
|
||||
arg
|
||||
};
|
||||
|
||||
arg = if let Some(new_arg) = arg.strip_prefix("'") {
|
||||
new_arg
|
||||
} else {
|
||||
arg
|
||||
};
|
||||
arg = if let Some(new_arg) = arg.strip_suffix("'") {
|
||||
new_arg
|
||||
} else {
|
||||
arg
|
||||
};
|
||||
arg.to_owned()
|
||||
})
|
||||
.collect(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Trinitry {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if self.arguments.is_empty() {
|
||||
f.write_str(&self.command)
|
||||
} else {
|
||||
f.write_fmt(format_args!(
|
||||
"{} {}",
|
||||
&self.command,
|
||||
&self.arguments.join(" ")
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod tests;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::Trinitry;
|
||||
|
||||
#[test]
|
||||
fn parse_cmd() {
|
||||
let string = "quit";
|
||||
let p = Trinitry::new(string).unwrap_or_else(|e| {
|
||||
panic!("{}", e);
|
||||
});
|
||||
assert_eq!(&p.command, "quit");
|
||||
assert!(&p.arguments.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_arg_clean() {
|
||||
let string = r##"lua print("Hi")"##;
|
||||
let p = Trinitry::new(string).unwrap_or_else(|e| {
|
||||
panic!("{}", e);
|
||||
});
|
||||
assert_eq!(&p.command, "lua");
|
||||
assert_eq!(&p.arguments[0], r#"print("Hi")"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_arg_quote() {
|
||||
let string = r##"write "some 'file' name""##;
|
||||
let p = Trinitry::new(string).unwrap_or_else(|e| {
|
||||
panic!("{}", e);
|
||||
});
|
||||
assert_eq!(&p.command, "write");
|
||||
assert_eq!(&p.arguments[0], "some 'file' name");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_arg_single_quote() {
|
||||
let string = r##"write 'some "file" name'"##;
|
||||
let p = Trinitry::new(string).unwrap_or_else(|e| {
|
||||
panic!("{}", e);
|
||||
});
|
||||
assert_eq!(&p.command, "write");
|
||||
assert_eq!(&p.arguments[0], "some \"file\" name");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_arg_multi() {
|
||||
let string = r##"write 'some "file" name' "other name" last"##;
|
||||
let p = Trinitry::new(string).unwrap_or_else(|e| {
|
||||
panic!("{}", e);
|
||||
});
|
||||
|
||||
let expected_args = vec!["some \"file\" name", "other name", "last"]
|
||||
.iter()
|
||||
.map(|str| (*str).to_owned())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
assert_eq!(&p.command, "write");
|
||||
assert_eq!(&p.arguments, &expected_args);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
chars = { ASCII_ALPHANUMERIC | "_" | "-" | "." }
|
||||
|
||||
// TODO(@soispha): Are these all the valid characters? <2023-11-01>
|
||||
argument_chars = { chars | "(" | ")" | "{" | "}" | "<" | ">" | "?" | "!" | "+" | "^" | "@"
|
||||
| "&" | "*" | "~" | "|" | "=" | "," | "\\" | "/" }
|
||||
whitespace = _{ " " } // lower case to avoid special treatment of 'WHITESPACE'
|
||||
|
||||
quote = _{ "\"" }
|
||||
q = _{ quote }
|
||||
|
||||
single_quote = _{ "'" }
|
||||
sq = _{ single_quote }
|
||||
|
||||
|
||||
|
||||
command = { chars+ }
|
||||
|
||||
arg_quoted = { q ~ (!q ~ (argument_chars | " " | "'" ))+ ~ q }
|
||||
arg_single_quoted = { sq ~ (!sq ~ (argument_chars | " " | "\"" ))+ ~ sq }
|
||||
arg = { (argument_chars | "\"" | "'")+ }
|
||||
|
||||
argument = { whitespace+ ~ (arg_quoted | arg_single_quoted | arg )}
|
||||
|
||||
|
||||
trinitry = { SOI ~ #command = command ~ (#argument = argument)* ~ EOI }
|
Reference in New Issue