fix(parser/tokenizer): Remove the tokenizer's death, when exposed to doc comments

Previously, the tokenizer had horrendous errors when lexing raw literal
strings. These have been removed. The remaining issue still persists,
that empty doc comments are serialized in c in a _weird_ way. They
should be merged.
This commit is contained in:
Benedikt Peetz 2024-03-27 08:07:29 +01:00
parent 00a82ac54b
commit 508dc2bc46
Signed by: bpeetz
GPG Key ID: A5E94010C3A642AD
13 changed files with 342 additions and 37 deletions

View File

@ -37,6 +37,8 @@ pub enum LexingError {
ExpectedArrow,
#[error("The Comment token must start with two slashes")]
ExpectedComment,
#[error("It seams like, you have a un-closed quote")]
RunawayQuote,
}
impl AdditionalHelp for LexingError {
@ -49,6 +51,7 @@ impl AdditionalHelp for LexingError {
format!("This char: `{char}`; is not a valid token")
},
LexingError::ExpectedComment => "The '/' started comment parsing, but I could not find a matching '/'".to_owned(),
LexingError::RunawayQuote => "Add a quote, somewhere.".to_owned(),
}
}
}

View File

@ -207,15 +207,17 @@ fn try_to_tokenize_raw_literal_string(text: &str) -> Result<(TokenKind, usize),
// remove the 'r' at the begining
let text_without_r = &text[1..];
let next_char = &text_without_r[0..1];
let next_char = &text_without_r[..1];
if next_char == "#" {
// The string is also escaped, count the hashtags
let (delimeter, chars_read) = take_while(text_without_r, |ch| ch == '#')?;
let delimeter = format!("\"{}", delimeter);
let (token, length) = tokenize_literal_string(&text_without_r[chars_read..], &delimeter)?;
// The 1 is the size of the extra 'r'
Ok((token, length + 1))
Ok((token, length + 1 + chars_read))
} else if next_char == "\"" {
// regular raw string literal
let (token, length) = tokenize_literal_string(text_without_r, "\"")?;
@ -238,6 +240,7 @@ fn tokenize_literal_string(text: &str, delimeter: &str) -> Result<(TokenKind, us
// &text[..1 + delimeter.len()]
// );
// eprintln!("Next up to parse: '{}'\n", &text[1 + delimeter.len()..20]);
// The literal string does not contain anything
Ok((TokenKind::StringLiteral("".to_owned()), 1 + delimeter.len()))
} else {
@ -251,11 +254,8 @@ fn tokenize_literal_string(text: &str, delimeter: &str) -> Result<(TokenKind, us
let (literal, chars_read) =
take_until_succesive_match(text_without_quote, &mut predicates)?;
// The second number read here is the last quote
Ok((
TokenKind::StringLiteral(literal.to_owned()),
chars_read + 1 + delimeter.len(),
))
// The + 1 is the quote taken at the beginning
Ok((TokenKind::StringLiteral(literal.to_owned()), chars_read + 1))
}
}
@ -329,35 +329,59 @@ where
assert!(!preds.is_empty(), "Predicates need to be provided");
let mut current_index = 0;
let mut current_predicate_char_index = 0;
'outer: for ch in data.chars() {
let should_stop = preds[0](ch);
current_index += ch.len_utf8();
if should_stop {
current_predicate_char_index = current_index;
// eprintln!("First predicate did match char: {:#?}", ch);
if preds.len() == 1 {
current_predicate_char_index += ch.len_utf8();
// eprintln!("Only one predicate provided, which matched: {:#?}\n", ch);
break 'outer;
}
'inner: for pred in &mut preds[1..] {
let ch = &data.chars().nth(current_index).expect("This should exists");
'inner: for predicate_index in 1..preds.len() {
let preds_len = preds.len();
let pred = &mut preds[predicate_index];
current_predicate_char_index += ch.len_utf8();
let ch = &data
.chars()
.nth(current_predicate_char_index)
.expect("This should exists");
// eprintln!("Checking pred with char: {:#?}", ch);
if pred(*ch) {
// eprintln!("Predicate did match char: {:#?}\n", ch);
if pred(*ch) && predicate_index == preds_len - 1 {
// eprintln!("Predicate did match char and was last: {:#?}\n", ch);
// TODO(@soispha): Why is this needed? <2024-03-26>
current_predicate_char_index += ch.len_utf8();
break 'outer;
}
} else if pred(*ch) {
// eprintln!("Predicate did match char, but was not last: {:#?}\n", ch);
continue;
} else {
// eprintln!("Predicate did not match char: {:#?}\n", ch);
current_index += ch.len_utf8();
break 'inner;
}
}
}
current_index += ch.len_utf8();
}
if current_index == 0 {
Err(LexingError::NoMatchesTaken)
} else if current_index > current_predicate_char_index {
Err(LexingError::RunawayQuote)
} else {
Ok((&data[..current_index], current_index))
Ok((
&data[..current_index],
current_index + (current_predicate_char_index - current_index),
))
}
}

View File

@ -7,23 +7,18 @@ File path: `out/dir/api.rs`
/* Rust API */
#[derive(Debug)]
pub enum Commands {
#[allow(non_camel_case_types)]
print { message: String },
Trinitrix(trinitrix::Trinitrix),
}
/// Attribute doc comment
pub mod trinitrix {
#[derive(Debug)]
pub enum Trinitrix {
/// Attribute doc comment, but very ##" "## "# " escaped
#[allow(non_camel_case_types)]
hi { trixy_output: trixy::oneshot::Sender<trixy::types::String>, name: String },
}
}
/* C API */
#[no_mangle]
pub extern "C" fn print(message: String) -> core::ffi::c_int {
callback_function(print);
return 1;
}
pub mod trinitrix_c {}
#[no_mangle]
pub extern "C" fn trinitrix_hi(
@ -58,10 +53,14 @@ File path: `dist/interface.h`
#include "string.h"
#include "vec.h"
extern int print (const char *message);
/**
Attribute doc comment, but very ##" "## "# " escaped
*/
extern int trinitrix_hi (const char **trixy_output, const char *name);
/**
Attribute doc comment
*/
struct trinitrix
{
int (*hi) (const char **, const char *);

View File

@ -0,0 +1,75 @@
# Host files
File path: `out/dir/api.rs`
```rust
// Host code
/* Rust API */
#[derive(Debug)]
pub enum Commands {
Trinitrix(trinitrix::Trinitrix),
}
/// First doc comment
pub mod trinitrix {
#[derive(Debug)]
pub enum Trinitrix {
/// Second doc comment
#[allow(non_camel_case_types)]
hi { trixy_output: trixy::oneshot::Sender<trixy::types::String>, name: String },
}
}
/* C API */
pub mod trinitrix_c {}
#[no_mangle]
pub extern "C" fn trinitrix_hi(
output: *mut trixy::types::String,
name: String,
) -> core::ffi::c_int {
let output_val: trixy::types::String = {
let (tx, rx) = trixy::oneshot::channel();
callback_function(trinitrix_hi);
let recv = rx
.recv()
.expect("The channel should not be closed until this value is received");
recv.into()
};
unsafe {
std::ptr::write(output, output_val);
}
return 1;
}
// vim: filetype=rust
```
# Auxiliary files
File path: `dist/interface.h`
```c
#if !defined TRIXY_MAIN_HEADER
#define TRIXY_MAIN_HEADER
#include "errno.h"
#include "string.h"
#include "vec.h"
/**
Second doc comment
*/
extern int trinitrix_hi (const char **trixy_output, const char *name);
/**
First doc comment
*/
struct trinitrix
{
int (*hi) (const char **, const char *);
};
const struct trinitrix trinitrix = {
.hi = trinitrix_hi,
};
#endif // if !defined TRIXY_MAIN_HEADER
// vim: filetype=c
```

View File

@ -25,8 +25,6 @@
mod trinitrix {
// another normal comment
/// Second doc comment
#[doc = "Attribute doc comment"]
#[doc = r###"Attribute doc comment, but very ##" "## "# " escaped"###]
fn hi(name: String) -> String;
}

View File

@ -0,0 +1,53 @@
# Host files
File path: `out/dir/api.rs`
```rust
// Host code
/* Rust API */
#[derive(Debug)]
pub enum Commands {
Test(test::Test),
}
/// an empty doc comment:
///
/// doc comment continued.
pub mod test {
#[derive(Debug)]
pub enum Test {}
}
/* C API */
pub mod test_c {}
// vim: filetype=rust
```
# Auxiliary files
File path: `dist/interface.h`
```c
#if !defined TRIXY_MAIN_HEADER
#define TRIXY_MAIN_HEADER
#include "errno.h"
#include "string.h"
#include "vec.h"
/**
an empty doc comment:
*/
/**
*/
/**
doc comment continued.
*/
struct test
{
};
const struct test test = {};
#endif // if !defined TRIXY_MAIN_HEADER
// vim: filetype=c
```

View File

@ -24,13 +24,13 @@ use pretty_assertions::assert_eq;
use trixy::macros::config::{file_tree::FileTree, trixy::TrixyConfig};
#[test]
pub fn empty() {
pub fn empty_doc_comment() {
let input = include_str!("./expected.md");
let expected: FileTree = input.parse().unwrap();
let config = TrixyConfig::new("callback_function")
.out_dir_path("out/dir")
.trixy_path("./tests/empty/input.tri")
.trixy_path("./tests/empty_doc_comment/input.tri")
.dist_dir_path("dist")
.add_c_headers(false);

View File

@ -0,0 +1,81 @@
# Host files
File path: `out/dir/api.rs`
```rust
// Host code
/* Rust API */
#[derive(Debug)]
pub enum Commands {
Trinitrix(trinitrix::Trinitrix),
}
/**I
contain
multiple
lines*/
pub mod trinitrix {
#[derive(Debug)]
pub enum Trinitrix {
///I \n also \n contain \n them
#[allow(non_camel_case_types)]
hi { trixy_output: trixy::oneshot::Sender<trixy::types::String>, name: String },
}
}
/* C API */
pub mod trinitrix_c {}
#[no_mangle]
pub extern "C" fn trinitrix_hi(
output: *mut trixy::types::String,
name: String,
) -> core::ffi::c_int {
let output_val: trixy::types::String = {
let (tx, rx) = trixy::oneshot::channel();
callback_function(trinitrix_hi);
let recv = rx
.recv()
.expect("The channel should not be closed until this value is received");
recv.into()
};
unsafe {
std::ptr::write(output, output_val);
}
return 1;
}
// vim: filetype=rust
```
# Auxiliary files
File path: `dist/interface.h`
```c
#if !defined TRIXY_MAIN_HEADER
#define TRIXY_MAIN_HEADER
#include "errno.h"
#include "string.h"
#include "vec.h"
/**
I \n also \n contain \n them
*/
extern int trinitrix_hi (const char **trixy_output, const char *name);
/**
I
contain
multiple
lines
*/
struct trinitrix
{
int (*hi) (const char **, const char *);
};
const struct trinitrix trinitrix = {
.hi = trinitrix_hi,
};
#endif // if !defined TRIXY_MAIN_HEADER
// vim: filetype=c
```

View File

@ -0,0 +1,33 @@
/*
* Copyright (C) 2023 - 2024:
* The Trinitrix Project <soispha@vhack.eu, antifallobst@systemausfall.org>
* SPDX-License-Identifier: LGPL-3.0-or-later
*
* This file is part of the Trixy crate for Trinitrix.
*
* Trixy is free software: you can redistribute it and/or modify
* it under the terms of the Lesser GNU General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* and the Lesser GNU General Public License along with this program.
* If not, see <https://www.gnu.org/licenses/>.
*/
#[doc = "I
contain
multiple
lines"]
mod trinitrix {
#[doc = r#"I \n also \n contain \n them"#]
fn hi(name: String) -> String;
}
// Trixy is sort of a subset of rust
// vim: syntax=rust

View File

@ -0,0 +1,39 @@
/*
* Copyright (C) 2023 - 2024:
* The Trinitrix Project <soispha@vhack.eu, antifallobst@systemausfall.org>
* SPDX-License-Identifier: LGPL-3.0-or-later
*
* This file is part of the Trixy crate for Trinitrix.
*
* Trixy is free software: you can redistribute it and/or modify
* it under the terms of the Lesser GNU General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* and the Lesser GNU General Public License along with this program.
* If not, see <https://www.gnu.org/licenses/>.
*/
use pretty_assertions::assert_eq;
use trixy::macros::config::{file_tree::FileTree, trixy::TrixyConfig};
#[test]
pub fn multi_line_doc_comment() {
let input = include_str!("./expected.md");
let expected: FileTree = input.parse().unwrap();
let config = TrixyConfig::new("callback_function")
.out_dir_path("out/dir")
.trixy_path("./tests/multi_line_doc_comment/input.tri")
.dist_dir_path("dist")
.add_c_headers(false);
let actual = config.generate();
assert_eq!(expected, actual);
}