fix(parser/lexing/take_until_sucessive_match): Add utf8 support

This commit is contained in:
Benedikt Peetz 2024-03-27 09:58:08 +01:00
parent 9233a136ee
commit fae3de8b49
Signed by: bpeetz
GPG Key ID: A5E94010C3A642AD
4 changed files with 143 additions and 15 deletions

View File

@ -328,17 +328,22 @@ where
{ {
assert!(!preds.is_empty(), "Predicates need to be provided"); assert!(!preds.is_empty(), "Predicates need to be provided");
let mut current_index = 0; let mut current_byte_index = 0;
let mut current_predicate_char_index = 0; let mut current_char_index = 0;
let mut current_predicate_byte_index = 0;
let mut current_predicate_char_index;
'outer: for ch in data.chars() { 'outer: for ch in data.chars() {
// eprintln!("Processing: {:#?}", ch);
let should_stop = preds[0](ch); let should_stop = preds[0](ch);
if should_stop { if should_stop {
current_predicate_char_index = current_index; current_predicate_byte_index = current_byte_index;
current_predicate_char_index = current_char_index;
// eprintln!("First predicate did match char: {:#?}", ch); // eprintln!("First predicate did match char: {:#?}", ch);
if preds.len() == 1 { if preds.len() == 1 {
current_predicate_char_index += ch.len_utf8(); current_predicate_byte_index += ch.len_utf8();
// eprintln!("Only one predicate provided, which matched: {:#?}\n", ch); // eprintln!("Only one predicate provided, which matched: {:#?}\n", ch);
break 'outer; break 'outer;
} }
@ -346,21 +351,22 @@ where
'inner: for predicate_index in 1..preds.len() { 'inner: for predicate_index in 1..preds.len() {
let preds_len = preds.len(); let preds_len = preds.len();
let pred = &mut preds[predicate_index]; let pred = &mut preds[predicate_index];
current_predicate_char_index += ch.len_utf8(); current_predicate_byte_index += ch.len_utf8();
current_predicate_char_index += 1;
let ch = &data let ch: char = data
.chars() .chars()
.nth(current_predicate_char_index) .nth(current_predicate_char_index)
.expect("This should exists"); .expect("This should always exist");
// eprintln!("Checking pred with char: {:#?}", ch); // eprintln!("Checking pred with char: {:#?}", ch);
if pred(*ch) && predicate_index == preds_len - 1 { if pred(ch) && predicate_index == preds_len - 1 {
// eprintln!("Predicate did match char and was last: {:#?}\n", ch); // eprintln!("Predicate did match char and was last: {:#?}\n", ch);
// TODO(@soispha): Why is this needed? <2024-03-26> // TODO(@soispha): Why is this needed? <2024-03-26>
current_predicate_char_index += ch.len_utf8(); current_predicate_byte_index += ch.len_utf8();
break 'outer; break 'outer;
} else if pred(*ch) { } else if pred(ch) {
// eprintln!("Predicate did match char, but was not last: {:#?}\n", ch); // eprintln!("Predicate did match char, but was not last: {:#?}\n", ch);
continue; continue;
} else { } else {
@ -370,17 +376,18 @@ where
} }
} }
current_index += ch.len_utf8(); current_byte_index += ch.len_utf8();
current_char_index += 1;
} }
if current_index == 0 { if current_byte_index == 0 {
Err(LexingError::NoMatchesTaken) Err(LexingError::NoMatchesTaken)
} else if current_index > current_predicate_char_index { } else if current_byte_index > current_predicate_byte_index {
Err(LexingError::RunawayQuote) Err(LexingError::RunawayQuote)
} else { } else {
Ok(( Ok((
&data[..current_index], &data[..current_byte_index],
current_index + (current_predicate_char_index - current_index), current_byte_index + (current_predicate_byte_index - current_byte_index),
)) ))
} }
} }

View File

@ -0,0 +1,53 @@
# Host files
File path: `out/dir/api.rs`
```rust
// Host code
/* Rust API */
#[derive(Debug)]
pub enum Commands {
Trinitrix(trinitrix::Trinitrix),
}
/// [...]
/// "å🙂" => "å" then "🙂" (full Unicode support!)
/// [...]
pub mod trinitrix {
#[derive(Debug)]
pub enum Trinitrix {}
}
/* C API */
pub mod trinitrix_c {}
// vim: filetype=rust
```
# Auxiliary files
File path: `dist/interface.h`
```c
#if !defined TRIXY_MAIN_HEADER
#define TRIXY_MAIN_HEADER
#include "errno.h"
#include "string.h"
#include "vec.h"
/**
[...]
*/
/**
"å🙂" => "å" then "🙂" (full Unicode support!)
*/
/**
[...]
*/
struct trinitrix
{
};
const struct trinitrix trinitrix = {};
#endif // if !defined TRIXY_MAIN_HEADER
// vim: filetype=c
```

View File

@ -0,0 +1,29 @@
/*
* Copyright (C) 2023 - 2024:
* The Trinitrix Project <soispha@vhack.eu, antifallobst@systemausfall.org>
* SPDX-License-Identifier: LGPL-3.0-or-later
*
* This file is part of the Trixy crate for Trinitrix.
*
* Trixy is free software: you can redistribute it and/or modify
* it under the terms of the Lesser GNU General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* and the Lesser GNU General Public License along with this program.
* If not, see <https://www.gnu.org/licenses/>.
*/
#[doc = r#" [...]"#]
#[doc = r#" "å🙂" => "å" then "🙂" (full Unicode support!)"#]
#[doc = r#" [...]"#]
mod trinitrix {}
// Trixy is sort of a subset of rust
// vim: syntax=rust

View File

@ -0,0 +1,39 @@
/*
* Copyright (C) 2023 - 2024:
* The Trinitrix Project <soispha@vhack.eu, antifallobst@systemausfall.org>
* SPDX-License-Identifier: LGPL-3.0-or-later
*
* This file is part of the Trixy crate for Trinitrix.
*
* Trixy is free software: you can redistribute it and/or modify
* it under the terms of the Lesser GNU General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* and the Lesser GNU General Public License along with this program.
* If not, see <https://www.gnu.org/licenses/>.
*/
use pretty_assertions::assert_eq;
use trixy::macros::config::{file_tree::FileTree, trixy::TrixyConfig};
#[test]
pub fn utf8_in_doc_comment() {
let input = include_str!("./expected.md");
let expected: FileTree = input.parse().unwrap();
let config = TrixyConfig::new("callback_function")
.out_dir_path("out/dir")
.trixy_path("./tests/utf8_in_doc_comment/input.tri")
.dist_dir_path("dist")
.add_c_headers(false);
let actual = config.generate();
assert_eq!(expected, actual);
}