feat(parser/lexing): Desugger doc comments by running a regex on the file
Previously we actually supported parsing doc comments (`///`), but replacing them before parsing allows for simplifications in the lexer. Precisely, that means that we can add support for attributes without having to maintain the doc comment parser.
This commit is contained in:
parent
918ab5df6d
commit
bf3eb61110
|
@ -33,8 +33,17 @@ pub struct Args {
|
|||
/// The subcommand to execute
|
||||
pub subcommand: Command,
|
||||
}
|
||||
|
||||
#[derive(Subcommand, Debug)]
|
||||
pub enum Command {
|
||||
#[clap(value_parser)]
|
||||
/// Only replace the regex replacements in the file
|
||||
Replace {
|
||||
#[clap(value_parser)]
|
||||
/// The file containing the trixy code to replace
|
||||
file: PathBuf,
|
||||
},
|
||||
|
||||
#[clap(value_parser)]
|
||||
/// Only try to tokenize the file
|
||||
Tokenize {
|
||||
|
@ -125,7 +134,7 @@ pub fn main() {
|
|||
let processed = match parsed.process(input) {
|
||||
Ok(ok) => ok,
|
||||
Err(err) => {
|
||||
eprintln!("Error while doing the seconde (checked) parsing run:");
|
||||
eprintln!("Error while doing the second (checked) parsing run:");
|
||||
eprintln!("{}", err);
|
||||
exit(1)
|
||||
}
|
||||
|
@ -140,5 +149,11 @@ pub fn main() {
|
|||
});
|
||||
println!("{:#?}", parsed);
|
||||
}
|
||||
|
||||
Command::Replace { file } => {
|
||||
let input = fs::read_to_string(file).unwrap();
|
||||
let parsed = TokenStream::replace(&input);
|
||||
println!("{}", parsed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,9 @@
|
|||
* If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
use std::fmt::Display;
|
||||
use std::{borrow::Cow, fmt::Display};
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
use self::{error::SpannedLexingError, tokenizer::Tokenizer};
|
||||
|
||||
|
@ -36,13 +38,26 @@ pub struct TokenStream {
|
|||
}
|
||||
|
||||
impl TokenStream {
|
||||
/// Try to remove syntax sugar by applying regex matching to the input string
|
||||
pub fn replace(src: &str) -> Cow<str> {
|
||||
// vim regex
|
||||
// :%s/\v^(\s*)\/\/\/(|[^/].*)$/\1#[doc = r#"\2"#]
|
||||
let re = Regex::new(r"(?m)^(?<space>\s*)///(?<content>|[^/].*)$").unwrap();
|
||||
|
||||
// Replace all doc comments with their attribute
|
||||
let src_new = re.replace_all(src, r##"$space#[doc = r#"$content"#]"##);
|
||||
src_new
|
||||
}
|
||||
|
||||
/// Turn a string of valid Trixy code into a list of tokens, including the
|
||||
/// location of that token's start and end point in the original source code.
|
||||
///
|
||||
/// Note the token indices represent the half-open interval `[start, end)`,
|
||||
/// equivalent to `start .. end` in Rust.
|
||||
pub fn lex(src: &str) -> Result<Self, SpannedLexingError> {
|
||||
let mut tokenizer = Tokenizer::new(src);
|
||||
let src = Self::replace(src);
|
||||
|
||||
let mut tokenizer = Tokenizer::new(&src);
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
while let Some(tok) = tokenizer.next_token()? {
|
||||
|
@ -57,7 +72,7 @@ impl TokenStream {
|
|||
|
||||
Ok(Self {
|
||||
tokens,
|
||||
original_file: src.to_owned(),
|
||||
original_file: src.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
Reference in New Issue