fix(parser/lexing/take_until_sucessive_match): Add utf8 support
This commit is contained in:
parent
9233a136ee
commit
fae3de8b49
|
@ -328,17 +328,22 @@ where
|
|||
{
|
||||
assert!(!preds.is_empty(), "Predicates need to be provided");
|
||||
|
||||
let mut current_index = 0;
|
||||
let mut current_predicate_char_index = 0;
|
||||
let mut current_byte_index = 0;
|
||||
let mut current_char_index = 0;
|
||||
let mut current_predicate_byte_index = 0;
|
||||
let mut current_predicate_char_index;
|
||||
|
||||
'outer: for ch in data.chars() {
|
||||
// eprintln!("Processing: {:#?}", ch);
|
||||
let should_stop = preds[0](ch);
|
||||
|
||||
if should_stop {
|
||||
current_predicate_char_index = current_index;
|
||||
current_predicate_byte_index = current_byte_index;
|
||||
current_predicate_char_index = current_char_index;
|
||||
|
||||
// eprintln!("First predicate did match char: {:#?}", ch);
|
||||
if preds.len() == 1 {
|
||||
current_predicate_char_index += ch.len_utf8();
|
||||
current_predicate_byte_index += ch.len_utf8();
|
||||
// eprintln!("Only one predicate provided, which matched: {:#?}\n", ch);
|
||||
break 'outer;
|
||||
}
|
||||
|
@ -346,21 +351,22 @@ where
|
|||
'inner: for predicate_index in 1..preds.len() {
|
||||
let preds_len = preds.len();
|
||||
let pred = &mut preds[predicate_index];
|
||||
current_predicate_char_index += ch.len_utf8();
|
||||
current_predicate_byte_index += ch.len_utf8();
|
||||
current_predicate_char_index += 1;
|
||||
|
||||
let ch = &data
|
||||
let ch: char = data
|
||||
.chars()
|
||||
.nth(current_predicate_char_index)
|
||||
.expect("This should exists");
|
||||
.expect("This should always exist");
|
||||
|
||||
// eprintln!("Checking pred with char: {:#?}", ch);
|
||||
if pred(*ch) && predicate_index == preds_len - 1 {
|
||||
if pred(ch) && predicate_index == preds_len - 1 {
|
||||
// eprintln!("Predicate did match char and was last: {:#?}\n", ch);
|
||||
|
||||
// TODO(@soispha): Why is this needed? <2024-03-26>
|
||||
current_predicate_char_index += ch.len_utf8();
|
||||
current_predicate_byte_index += ch.len_utf8();
|
||||
break 'outer;
|
||||
} else if pred(*ch) {
|
||||
} else if pred(ch) {
|
||||
// eprintln!("Predicate did match char, but was not last: {:#?}\n", ch);
|
||||
continue;
|
||||
} else {
|
||||
|
@ -370,17 +376,18 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
current_index += ch.len_utf8();
|
||||
current_byte_index += ch.len_utf8();
|
||||
current_char_index += 1;
|
||||
}
|
||||
|
||||
if current_index == 0 {
|
||||
if current_byte_index == 0 {
|
||||
Err(LexingError::NoMatchesTaken)
|
||||
} else if current_index > current_predicate_char_index {
|
||||
} else if current_byte_index > current_predicate_byte_index {
|
||||
Err(LexingError::RunawayQuote)
|
||||
} else {
|
||||
Ok((
|
||||
&data[..current_index],
|
||||
current_index + (current_predicate_char_index - current_index),
|
||||
&data[..current_byte_index],
|
||||
current_byte_index + (current_predicate_byte_index - current_byte_index),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
# Host files
|
||||
|
||||
File path: `out/dir/api.rs`
|
||||
|
||||
```rust
|
||||
// Host code
|
||||
/* Rust API */
|
||||
#[derive(Debug)]
|
||||
pub enum Commands {
|
||||
Trinitrix(trinitrix::Trinitrix),
|
||||
}
|
||||
/// [...]
|
||||
/// "å🙂" => "å" then "🙂" (full Unicode support!)
|
||||
/// [...]
|
||||
pub mod trinitrix {
|
||||
#[derive(Debug)]
|
||||
pub enum Trinitrix {}
|
||||
}
|
||||
/* C API */
|
||||
pub mod trinitrix_c {}
|
||||
// vim: filetype=rust
|
||||
```
|
||||
|
||||
# Auxiliary files
|
||||
|
||||
File path: `dist/interface.h`
|
||||
|
||||
```c
|
||||
#if !defined TRIXY_MAIN_HEADER
|
||||
#define TRIXY_MAIN_HEADER
|
||||
|
||||
#include "errno.h"
|
||||
#include "string.h"
|
||||
#include "vec.h"
|
||||
|
||||
/**
|
||||
[...]
|
||||
*/
|
||||
/**
|
||||
"å🙂" => "å" then "🙂" (full Unicode support!)
|
||||
*/
|
||||
/**
|
||||
[...]
|
||||
*/
|
||||
struct trinitrix
|
||||
{
|
||||
};
|
||||
|
||||
const struct trinitrix trinitrix = {};
|
||||
|
||||
#endif // if !defined TRIXY_MAIN_HEADER
|
||||
// vim: filetype=c
|
||||
```
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* Copyright (C) 2023 - 2024:
|
||||
* The Trinitrix Project <soispha@vhack.eu, antifallobst@systemausfall.org>
|
||||
* SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
*
|
||||
* This file is part of the Trixy crate for Trinitrix.
|
||||
*
|
||||
* Trixy is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the Lesser GNU General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* and the Lesser GNU General Public License along with this program.
|
||||
* If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#[doc = r#" [...]"#]
|
||||
#[doc = r#" "å🙂" => "å" then "🙂" (full Unicode support!)"#]
|
||||
#[doc = r#" [...]"#]
|
||||
mod trinitrix {}
|
||||
|
||||
// Trixy is sort of a subset of rust
|
||||
// vim: syntax=rust
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright (C) 2023 - 2024:
|
||||
* The Trinitrix Project <soispha@vhack.eu, antifallobst@systemausfall.org>
|
||||
* SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
*
|
||||
* This file is part of the Trixy crate for Trinitrix.
|
||||
*
|
||||
* Trixy is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the Lesser GNU General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* and the Lesser GNU General Public License along with this program.
|
||||
* If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
use trixy::macros::config::{file_tree::FileTree, trixy::TrixyConfig};
|
||||
|
||||
#[test]
|
||||
pub fn utf8_in_doc_comment() {
|
||||
let input = include_str!("./expected.md");
|
||||
let expected: FileTree = input.parse().unwrap();
|
||||
|
||||
let config = TrixyConfig::new("callback_function")
|
||||
.out_dir_path("out/dir")
|
||||
.trixy_path("./tests/utf8_in_doc_comment/input.tri")
|
||||
.dist_dir_path("dist")
|
||||
.add_c_headers(false);
|
||||
|
||||
let actual = config.generate();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
Reference in New Issue