diff --git a/trixy-parser/src/bin/trixy-parser.rs b/trixy-parser/src/bin/trixy-parser.rs index 9eb5e7e..2340ce7 100644 --- a/trixy-parser/src/bin/trixy-parser.rs +++ b/trixy-parser/src/bin/trixy-parser.rs @@ -33,8 +33,17 @@ pub struct Args { /// The subcommand to execute pub subcommand: Command, } + #[derive(Subcommand, Debug)] pub enum Command { + #[clap(value_parser)] + /// Only replace the regex replacements in the file + Replace { + #[clap(value_parser)] + /// The file containing the trixy code to replace + file: PathBuf, + }, + #[clap(value_parser)] /// Only try to tokenize the file Tokenize { @@ -125,7 +134,7 @@ pub fn main() { let processed = match parsed.process(input) { Ok(ok) => ok, Err(err) => { - eprintln!("Error while doing the seconde (checked) parsing run:"); + eprintln!("Error while doing the second (checked) parsing run:"); eprintln!("{}", err); exit(1) } @@ -140,5 +149,11 @@ pub fn main() { }); println!("{:#?}", parsed); } + + Command::Replace { file } => { + let input = fs::read_to_string(file).unwrap(); + let parsed = TokenStream::replace(&input); + println!("{}", parsed); + } } } diff --git a/trixy-parser/src/lexing/mod.rs b/trixy-parser/src/lexing/mod.rs index b0a69e1..381e070 100644 --- a/trixy-parser/src/lexing/mod.rs +++ b/trixy-parser/src/lexing/mod.rs @@ -19,7 +19,9 @@ * If not, see . */ -use std::fmt::Display; +use std::{borrow::Cow, fmt::Display}; + +use regex::Regex; use self::{error::SpannedLexingError, tokenizer::Tokenizer}; @@ -36,13 +38,26 @@ pub struct TokenStream { } impl TokenStream { + /// Try to remove syntax sugar by applying regex matching to the input string + pub fn replace(src: &str) -> Cow { + // vim regex + // :%s/\v^(\s*)\/\/\/(|[^/].*)$/\1#[doc = r#"\2"#] + let re = Regex::new(r"(?m)^(?\s*)///(?|[^/].*)$").unwrap(); + + // Replace all doc comments with their attribute + let src_new = re.replace_all(src, r##"$space#[doc = r#"$content"#]"##); + src_new + } + /// Turn a string of valid Trixy code into a list of tokens, including the /// location of that token's start and end point in the original source code. /// /// Note the token indices represent the half-open interval `[start, end)`, /// equivalent to `start .. end` in Rust. pub fn lex(src: &str) -> Result { - let mut tokenizer = Tokenizer::new(src); + let src = Self::replace(src); + + let mut tokenizer = Tokenizer::new(&src); let mut tokens = Vec::new(); while let Some(tok) = tokenizer.next_token()? { @@ -57,7 +72,7 @@ impl TokenStream { Ok(Self { tokens, - original_file: src.to_owned(), + original_file: src.to_string(), }) }