fix(parser/lexing/take_until_sucessive_match): Add utf8 support
This commit is contained in:
parent
9233a136ee
commit
fae3de8b49
|
@ -328,17 +328,22 @@ where
|
||||||
{
|
{
|
||||||
assert!(!preds.is_empty(), "Predicates need to be provided");
|
assert!(!preds.is_empty(), "Predicates need to be provided");
|
||||||
|
|
||||||
let mut current_index = 0;
|
let mut current_byte_index = 0;
|
||||||
let mut current_predicate_char_index = 0;
|
let mut current_char_index = 0;
|
||||||
|
let mut current_predicate_byte_index = 0;
|
||||||
|
let mut current_predicate_char_index;
|
||||||
|
|
||||||
'outer: for ch in data.chars() {
|
'outer: for ch in data.chars() {
|
||||||
|
// eprintln!("Processing: {:#?}", ch);
|
||||||
let should_stop = preds[0](ch);
|
let should_stop = preds[0](ch);
|
||||||
|
|
||||||
if should_stop {
|
if should_stop {
|
||||||
current_predicate_char_index = current_index;
|
current_predicate_byte_index = current_byte_index;
|
||||||
|
current_predicate_char_index = current_char_index;
|
||||||
|
|
||||||
// eprintln!("First predicate did match char: {:#?}", ch);
|
// eprintln!("First predicate did match char: {:#?}", ch);
|
||||||
if preds.len() == 1 {
|
if preds.len() == 1 {
|
||||||
current_predicate_char_index += ch.len_utf8();
|
current_predicate_byte_index += ch.len_utf8();
|
||||||
// eprintln!("Only one predicate provided, which matched: {:#?}\n", ch);
|
// eprintln!("Only one predicate provided, which matched: {:#?}\n", ch);
|
||||||
break 'outer;
|
break 'outer;
|
||||||
}
|
}
|
||||||
|
@ -346,21 +351,22 @@ where
|
||||||
'inner: for predicate_index in 1..preds.len() {
|
'inner: for predicate_index in 1..preds.len() {
|
||||||
let preds_len = preds.len();
|
let preds_len = preds.len();
|
||||||
let pred = &mut preds[predicate_index];
|
let pred = &mut preds[predicate_index];
|
||||||
current_predicate_char_index += ch.len_utf8();
|
current_predicate_byte_index += ch.len_utf8();
|
||||||
|
current_predicate_char_index += 1;
|
||||||
|
|
||||||
let ch = &data
|
let ch: char = data
|
||||||
.chars()
|
.chars()
|
||||||
.nth(current_predicate_char_index)
|
.nth(current_predicate_char_index)
|
||||||
.expect("This should exists");
|
.expect("This should always exist");
|
||||||
|
|
||||||
// eprintln!("Checking pred with char: {:#?}", ch);
|
// eprintln!("Checking pred with char: {:#?}", ch);
|
||||||
if pred(*ch) && predicate_index == preds_len - 1 {
|
if pred(ch) && predicate_index == preds_len - 1 {
|
||||||
// eprintln!("Predicate did match char and was last: {:#?}\n", ch);
|
// eprintln!("Predicate did match char and was last: {:#?}\n", ch);
|
||||||
|
|
||||||
// TODO(@soispha): Why is this needed? <2024-03-26>
|
// TODO(@soispha): Why is this needed? <2024-03-26>
|
||||||
current_predicate_char_index += ch.len_utf8();
|
current_predicate_byte_index += ch.len_utf8();
|
||||||
break 'outer;
|
break 'outer;
|
||||||
} else if pred(*ch) {
|
} else if pred(ch) {
|
||||||
// eprintln!("Predicate did match char, but was not last: {:#?}\n", ch);
|
// eprintln!("Predicate did match char, but was not last: {:#?}\n", ch);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
|
@ -370,17 +376,18 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
current_index += ch.len_utf8();
|
current_byte_index += ch.len_utf8();
|
||||||
|
current_char_index += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if current_index == 0 {
|
if current_byte_index == 0 {
|
||||||
Err(LexingError::NoMatchesTaken)
|
Err(LexingError::NoMatchesTaken)
|
||||||
} else if current_index > current_predicate_char_index {
|
} else if current_byte_index > current_predicate_byte_index {
|
||||||
Err(LexingError::RunawayQuote)
|
Err(LexingError::RunawayQuote)
|
||||||
} else {
|
} else {
|
||||||
Ok((
|
Ok((
|
||||||
&data[..current_index],
|
&data[..current_byte_index],
|
||||||
current_index + (current_predicate_char_index - current_index),
|
current_byte_index + (current_predicate_byte_index - current_byte_index),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
# Host files
|
||||||
|
|
||||||
|
File path: `out/dir/api.rs`
|
||||||
|
|
||||||
|
```rust
|
||||||
|
// Host code
|
||||||
|
/* Rust API */
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum Commands {
|
||||||
|
Trinitrix(trinitrix::Trinitrix),
|
||||||
|
}
|
||||||
|
/// [...]
|
||||||
|
/// "å🙂" => "å" then "🙂" (full Unicode support!)
|
||||||
|
/// [...]
|
||||||
|
pub mod trinitrix {
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum Trinitrix {}
|
||||||
|
}
|
||||||
|
/* C API */
|
||||||
|
pub mod trinitrix_c {}
|
||||||
|
// vim: filetype=rust
|
||||||
|
```
|
||||||
|
|
||||||
|
# Auxiliary files
|
||||||
|
|
||||||
|
File path: `dist/interface.h`
|
||||||
|
|
||||||
|
```c
|
||||||
|
#if !defined TRIXY_MAIN_HEADER
|
||||||
|
#define TRIXY_MAIN_HEADER
|
||||||
|
|
||||||
|
#include "errno.h"
|
||||||
|
#include "string.h"
|
||||||
|
#include "vec.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
[...]
|
||||||
|
*/
|
||||||
|
/**
|
||||||
|
"å🙂" => "å" then "🙂" (full Unicode support!)
|
||||||
|
*/
|
||||||
|
/**
|
||||||
|
[...]
|
||||||
|
*/
|
||||||
|
struct trinitrix
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
const struct trinitrix trinitrix = {};
|
||||||
|
|
||||||
|
#endif // if !defined TRIXY_MAIN_HEADER
|
||||||
|
// vim: filetype=c
|
||||||
|
```
|
|
@ -0,0 +1,29 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2023 - 2024:
|
||||||
|
* The Trinitrix Project <soispha@vhack.eu, antifallobst@systemausfall.org>
|
||||||
|
* SPDX-License-Identifier: LGPL-3.0-or-later
|
||||||
|
*
|
||||||
|
* This file is part of the Trixy crate for Trinitrix.
|
||||||
|
*
|
||||||
|
* Trixy is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the Lesser GNU General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of
|
||||||
|
* the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* and the Lesser GNU General Public License along with this program.
|
||||||
|
* If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#[doc = r#" [...]"#]
|
||||||
|
#[doc = r#" "å🙂" => "å" then "🙂" (full Unicode support!)"#]
|
||||||
|
#[doc = r#" [...]"#]
|
||||||
|
mod trinitrix {}
|
||||||
|
|
||||||
|
// Trixy is sort of a subset of rust
|
||||||
|
// vim: syntax=rust
|
|
@ -0,0 +1,39 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2023 - 2024:
|
||||||
|
* The Trinitrix Project <soispha@vhack.eu, antifallobst@systemausfall.org>
|
||||||
|
* SPDX-License-Identifier: LGPL-3.0-or-later
|
||||||
|
*
|
||||||
|
* This file is part of the Trixy crate for Trinitrix.
|
||||||
|
*
|
||||||
|
* Trixy is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the Lesser GNU General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of
|
||||||
|
* the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* and the Lesser GNU General Public License along with this program.
|
||||||
|
* If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use pretty_assertions::assert_eq;
|
||||||
|
use trixy::macros::config::{file_tree::FileTree, trixy::TrixyConfig};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn utf8_in_doc_comment() {
|
||||||
|
let input = include_str!("./expected.md");
|
||||||
|
let expected: FileTree = input.parse().unwrap();
|
||||||
|
|
||||||
|
let config = TrixyConfig::new("callback_function")
|
||||||
|
.out_dir_path("out/dir")
|
||||||
|
.trixy_path("./tests/utf8_in_doc_comment/input.tri")
|
||||||
|
.dist_dir_path("dist")
|
||||||
|
.add_c_headers(false);
|
||||||
|
|
||||||
|
let actual = config.generate();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
}
|
Reference in New Issue