feat(trinitry): Initialize the parsing crate
This commit is contained in:
parent
fcdfb4488b
commit
2c51bf073e
|
@ -0,0 +1,6 @@
|
||||||
|
# build
|
||||||
|
/target
|
||||||
|
/result
|
||||||
|
|
||||||
|
# It is a library
|
||||||
|
Cargo.lock
|
|
@ -0,0 +1,15 @@
|
||||||
|
[package]
|
||||||
|
name = "trinitry"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
pest = "2.7.5"
|
||||||
|
pest_derive = {version = "2.7.5", features = ["grammar-extras"]}
|
||||||
|
thiserror = "1.0.50"
|
||||||
|
|
||||||
|
# The header imports katex, a js latex parser, into the doc comments
|
||||||
|
[package.metadata.docs.rs]
|
||||||
|
rustdoc-args = [ "--html-in-header", "./docs-header.html" ]
|
|
@ -0,0 +1,45 @@
|
||||||
|
<!doctype html>
|
||||||
|
<!-- KaTeX requires the use of the HTML5 doctype. Without it, KaTeX may not render properly -->
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<link
|
||||||
|
rel="stylesheet"
|
||||||
|
href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css"
|
||||||
|
integrity="sha384-n8MVd4RsNIU0tAv4ct0nTaAbDJwPJzDEaqSD1odI+WdtXRGWt2kTvGFasHpSy3SV"
|
||||||
|
crossorigin="anonymous"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<!-- The loading of KaTeX is deferred to speed up page rendering -->
|
||||||
|
<script
|
||||||
|
defer
|
||||||
|
src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js"
|
||||||
|
integrity="sha384-XjKyOOlGwcjNTAIQHIpgOno0Hl1YQqzUOEleOLALmuqehneUG+vnGctmUb0ZY0l8"
|
||||||
|
crossorigin="anonymous"
|
||||||
|
></script>
|
||||||
|
|
||||||
|
<!-- To automatically render math in text elements, include the auto-render extension: -->
|
||||||
|
<script
|
||||||
|
defer
|
||||||
|
src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/contrib/auto-render.min.js"
|
||||||
|
integrity="sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05"
|
||||||
|
crossorigin="anonymous"
|
||||||
|
></script>
|
||||||
|
<script>
|
||||||
|
document.addEventListener("DOMContentLoaded", function () {
|
||||||
|
renderMathInElement(document.body, {
|
||||||
|
delimiters: [
|
||||||
|
{ left: "$$", right: "$$", display: true },
|
||||||
|
{ left: "\\(", right: "\\)", display: false },
|
||||||
|
{ left: "$", right: "$", display: false },
|
||||||
|
{ left: "\\[", right: "\\]", display: true },
|
||||||
|
]
|
||||||
|
// FIXME(@soispha): This removes the quotes completely <2023-10-31>
|
||||||
|
// macros: {
|
||||||
|
// "”": "\\noexpand ”",
|
||||||
|
// "“": "\\noexpand “",
|
||||||
|
// },
|
||||||
|
});
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
</head>
|
||||||
|
</html>
|
|
@ -0,0 +1,192 @@
|
||||||
|
//! This crate is a parser for the 'Trinitry' (not 'Trinity') language, used to map all sort of
|
||||||
|
//! Functions to a memorable command.
|
||||||
|
//!
|
||||||
|
//! This parser is more of a validator, as Trinitry does not support any language features besides
|
||||||
|
//! the aforementioned commands and arguments. That includes some simple constructs like: '||' (OR)
|
||||||
|
//! or '&&' (AND). If you need these features, simple write them in the language, you've written your
|
||||||
|
//! Function in.
|
||||||
|
//!
|
||||||
|
//! # General specification
|
||||||
|
//! ## Command
|
||||||
|
//! Basically every command can be a series of alphanumeric ASCII values.
|
||||||
|
//!
|
||||||
|
//! Correctly spoken, the Language, containing all valid command names, is just the Kleene closure
|
||||||
|
//! over an Alphabet $\Sigma$, which contains all alphanumeric characters:
|
||||||
|
//! $$ \Sigma_{cmd} = \\{x | 0 \leqslant x \leqslant 9\\} \cup \\{x | "a" \leqslant x \leqslant "z"\\} \cup \\{x | "A" \leqslant x \leqslant "Z"\\} \cup \\{"\\_", "\text{-}", "."\\} $$
|
||||||
|
//!
|
||||||
|
//! ## Argument
|
||||||
|
//! Arguments are similar to the command, although they can also contain spaces and quotes,
|
||||||
|
//! if it's quoted and additional characters (here notated as "$\\dots{}$"):
|
||||||
|
//! $$ \Sigma_{args-quoted} = \Sigma_{cmd} \cup \\{"\\text{"}", "\\ ", \\dots{}\\} $$
|
||||||
|
//! $$ \Sigma_{args-single-quoted} = \Sigma_{cmd} \cup \\{"'", "\\ ", \\dots{}\\} $$
|
||||||
|
//! $$ \Sigma_{args} = \Sigma_{cmd} \cup \\{\\dots{}\\} $$
|
||||||
|
//! Look at the [trinitry.pest](../../../src/trinitry.pest) file for a full list of the additional
|
||||||
|
//! allowed characters.
|
||||||
|
//!
|
||||||
|
//! # Examples
|
||||||
|
//! ## Command
|
||||||
|
//! A valid command would be something like that:
|
||||||
|
//! ```text
|
||||||
|
//! quit
|
||||||
|
//! ```
|
||||||
|
//! something like that would not be valid however, as Trinitry does not support these 'complex'
|
||||||
|
//! language features:
|
||||||
|
//! ```text
|
||||||
|
//! write && quit
|
||||||
|
//! ```
|
||||||
|
//! ## Arguments
|
||||||
|
//! A valid argumented command would be:
|
||||||
|
//! ```text
|
||||||
|
//! lua "function() print('Hi!') end"
|
||||||
|
//! ```
|
||||||
|
//! Whilst this would not be valid (that is, it would very likely not be what you want):
|
||||||
|
//! ```text
|
||||||
|
//! lua "function() print("Hi!") end"
|
||||||
|
//! ```
|
||||||
|
//! as the double quotes in the print statement actually unquote the argument, leaving you with
|
||||||
|
//! three arguments:
|
||||||
|
//! 1. `function() print(`
|
||||||
|
//! 1. `Hi!`
|
||||||
|
//! 1. `) end`
|
||||||
|
use std::fmt::Display;
|
||||||
|
|
||||||
|
use pest::{error::Error, Parser};
|
||||||
|
use pest_derive::Parser;
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
#[grammar = "trinitry.pest"]
|
||||||
|
pub struct Trinitry {
|
||||||
|
command: String,
|
||||||
|
arguments: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Trinitry {
|
||||||
|
pub fn new(input: &str) -> Result<Self, Error<Rule>> {
|
||||||
|
let parsed = Self::parse(Rule::trinitry, input)?;
|
||||||
|
|
||||||
|
let command = {
|
||||||
|
let command: Vec<_> = parsed.clone().find_tagged("command").collect();
|
||||||
|
|
||||||
|
// Ensure that we have only one command
|
||||||
|
// This should be ensured by the grammar, thus the 'debug_assert'
|
||||||
|
debug_assert_eq!(command.len(), 1);
|
||||||
|
|
||||||
|
// PERFORMANCE(@soispha): Replace this with `mem::take` (when pairs implements Default)
|
||||||
|
// <2023-11-01>
|
||||||
|
command
|
||||||
|
.first()
|
||||||
|
.expect("This should contain exactly one element")
|
||||||
|
.to_owned()
|
||||||
|
};
|
||||||
|
let arguments: Vec<_> = parsed.clone().find_tagged("argument").collect();
|
||||||
|
|
||||||
|
Ok(Trinitry {
|
||||||
|
command: command.as_str().to_owned(),
|
||||||
|
arguments: arguments
|
||||||
|
.iter()
|
||||||
|
.map(|arg| {
|
||||||
|
let mut arg = arg.as_str().trim();
|
||||||
|
arg = if let Some(new_arg) = arg.strip_prefix("\"") {
|
||||||
|
new_arg
|
||||||
|
} else {
|
||||||
|
arg
|
||||||
|
};
|
||||||
|
arg = if let Some(new_arg) = arg.strip_suffix("\"") {
|
||||||
|
new_arg
|
||||||
|
} else {
|
||||||
|
arg
|
||||||
|
};
|
||||||
|
|
||||||
|
arg = if let Some(new_arg) = arg.strip_prefix("'") {
|
||||||
|
new_arg
|
||||||
|
} else {
|
||||||
|
arg
|
||||||
|
};
|
||||||
|
arg = if let Some(new_arg) = arg.strip_suffix("'") {
|
||||||
|
new_arg
|
||||||
|
} else {
|
||||||
|
arg
|
||||||
|
};
|
||||||
|
arg.to_owned()
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for Trinitry {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
if self.arguments.is_empty() {
|
||||||
|
f.write_str(&self.command)
|
||||||
|
} else {
|
||||||
|
f.write_fmt(format_args!(
|
||||||
|
"{} {}",
|
||||||
|
&self.command,
|
||||||
|
&self.arguments.join(" ")
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mod tests;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use crate::Trinitry;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_cmd() {
|
||||||
|
let string = "quit";
|
||||||
|
let p = Trinitry::new(string).unwrap_or_else(|e| {
|
||||||
|
panic!("{}", e);
|
||||||
|
});
|
||||||
|
assert_eq!(&p.command, "quit");
|
||||||
|
assert!(&p.arguments.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_arg_clean() {
|
||||||
|
let string = r##"lua print("Hi")"##;
|
||||||
|
let p = Trinitry::new(string).unwrap_or_else(|e| {
|
||||||
|
panic!("{}", e);
|
||||||
|
});
|
||||||
|
assert_eq!(&p.command, "lua");
|
||||||
|
assert_eq!(&p.arguments[0], r#"print("Hi")"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_arg_quote() {
|
||||||
|
let string = r##"write "some 'file' name""##;
|
||||||
|
let p = Trinitry::new(string).unwrap_or_else(|e| {
|
||||||
|
panic!("{}", e);
|
||||||
|
});
|
||||||
|
assert_eq!(&p.command, "write");
|
||||||
|
assert_eq!(&p.arguments[0], "some 'file' name");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_arg_single_quote() {
|
||||||
|
let string = r##"write 'some "file" name'"##;
|
||||||
|
let p = Trinitry::new(string).unwrap_or_else(|e| {
|
||||||
|
panic!("{}", e);
|
||||||
|
});
|
||||||
|
assert_eq!(&p.command, "write");
|
||||||
|
assert_eq!(&p.arguments[0], "some \"file\" name");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_arg_multi() {
|
||||||
|
let string = r##"write 'some "file" name' "other name" last"##;
|
||||||
|
let p = Trinitry::new(string).unwrap_or_else(|e| {
|
||||||
|
panic!("{}", e);
|
||||||
|
});
|
||||||
|
|
||||||
|
let expected_args = vec!["some \"file\" name", "other name", "last"]
|
||||||
|
.iter()
|
||||||
|
.map(|str| (*str).to_owned())
|
||||||
|
.collect::<Vec<String>>();
|
||||||
|
|
||||||
|
assert_eq!(&p.command, "write");
|
||||||
|
assert_eq!(&p.arguments, &expected_args);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
chars = { ASCII_ALPHANUMERIC | "_" | "-" | "." }
|
||||||
|
|
||||||
|
// TODO(@soispha): Are these all the valid characters? <2023-11-01>
|
||||||
|
argument_chars = { chars | "(" | ")" | "{" | "}" | "<" | ">" | "?" | "!" | "+" | "^" | "@"
|
||||||
|
| "&" | "*" | "~" | "|" | "=" | "," | "\\" | "/" }
|
||||||
|
whitespace = _{ " " } // lower case to avoid special treatment of 'WHITESPACE'
|
||||||
|
|
||||||
|
quote = _{ "\"" }
|
||||||
|
q = _{ quote }
|
||||||
|
|
||||||
|
single_quote = _{ "'" }
|
||||||
|
sq = _{ single_quote }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
command = { chars+ }
|
||||||
|
|
||||||
|
arg_quoted = { q ~ (!q ~ (argument_chars | " " | "'" ))+ ~ q }
|
||||||
|
arg_single_quoted = { sq ~ (!sq ~ (argument_chars | " " | "\"" ))+ ~ sq }
|
||||||
|
arg = { (argument_chars | "\"" | "'")+ }
|
||||||
|
|
||||||
|
argument = { whitespace+ ~ (arg_quoted | arg_single_quoted | arg )}
|
||||||
|
|
||||||
|
|
||||||
|
trinitry = { SOI ~ #command = command ~ (#argument = argument)* ~ EOI }
|
Reference in New Issue