89 lines
3.3 KiB
C
89 lines
3.3 KiB
C
|
|
||
|
#ifndef REDUCTOR_TEXT_H
|
||
|
#define REDUCTOR_TEXT_H
|
||
|
|
||
|
#include <stdbool.h>
|
||
|
#include <stdint.h>
|
||
|
|
||
|
typedef uint32_t rune_t;
|
||
|
|
||
|
/// @brief Gets the length of the UTF-8 rune at a given offset.
|
||
|
/// @param source Source to get the bytes from.
|
||
|
/// @param len_source Length of 'source' in bytes.
|
||
|
/// @param offset Offset to read from; doesn't have to be the start of the UTF-8 character.
|
||
|
/// @return Length of the rune or zero on error.
|
||
|
uint32_t tred_get_utf8_rune_length(
|
||
|
const char *source,
|
||
|
uint32_t len_source,
|
||
|
uint32_t offset);
|
||
|
|
||
|
/// @brief Extracts a UTF-8 character at some offset.
|
||
|
/// @warning The length isn't provided by this function; it has to be gotten using
|
||
|
/// 'tred_get_utf8_rune_length'.
|
||
|
/// @param source Source string to get the bytes from.
|
||
|
/// @param len_source Length of 'source' in bytes.
|
||
|
/// @param offset Offset to read from; doesn't have to be the start of the UTF-8 character.
|
||
|
/// @return Rune read from 'source' at 'offset'.
|
||
|
rune_t tred_extract_utf8 (
|
||
|
const char *source,
|
||
|
uint32_t len_source,
|
||
|
uint32_t offset);
|
||
|
|
||
|
/// @brief Gets an ASCII character out of a source string, performing boundary checks.
|
||
|
/// @param source Source to read from
|
||
|
/// @param len_source Length of source to read from
|
||
|
/// @param offset Offset to read at
|
||
|
/// @return Rune read from 'source' at 'offset'.
|
||
|
rune_t tred_extract_ascii (
|
||
|
const char *source,
|
||
|
uint32_t len_source,
|
||
|
uint32_t offset);
|
||
|
|
||
|
int32_t tred_get_utf8_length_from_head(uint8_t head);
|
||
|
|
||
|
int32_t tred_get_utf8_head_offset(
|
||
|
const char *source,
|
||
|
uint32_t len_source,
|
||
|
uint32_t offset);
|
||
|
|
||
|
|
||
|
|
||
|
/// @brief Tests whether a rune is a blank (whitespace or tab) rune.
|
||
|
/// @param rune Rune to test for being blank.
|
||
|
/// @return 'true' if 'rune' is a blank character, 'false' otherwise.
|
||
|
bool tred_rune_is_blank(rune_t rune);
|
||
|
|
||
|
/// @brief Tests a rune_t for being a uppercase letter.
|
||
|
/// @param rune Rune to test for being an uppercase Latin letter.
|
||
|
/// @return 'true' if 'rune' is an uppercase Latin letter, 'false' otherwise.
|
||
|
bool tred_rune_is_uppercase(rune_t rune);
|
||
|
|
||
|
/// @brief Tests a rune_t for being a lowercase letter.
|
||
|
/// @param rune Rune to test for being an lowercase Latin letter.
|
||
|
/// @return 'true' if 'rune' is an lowercase Latin letter, 'false' otherwise.
|
||
|
bool tred_rune_is_lowercase(rune_t rune);
|
||
|
|
||
|
/// @brief Tests a rune_t for being either lowercase or uppercase.
|
||
|
/// @param rune Rune to test for being a Latin letter.
|
||
|
/// @return 'true' if 'rune' is either lowercase or uppercase, 'false' otherwise.
|
||
|
bool tred_rune_is_alphabetic(rune_t rune);
|
||
|
|
||
|
/// @brief Tests wheter a rune_t is a digit from 0 to 9.
|
||
|
/// @param rune Rune to test for being a digit.
|
||
|
/// @return 'true' if 'rune' is a digit from 0 to 9, 'false' otherwise.
|
||
|
bool tred_rune_is_numeric(rune_t rune);
|
||
|
|
||
|
/// @brief Tests whether a rune_t is either a digit from 0 to 9
|
||
|
/// or a letter (either lowercase or uppercase).
|
||
|
/// @param rune Rune to test for being alphanumeric
|
||
|
/// @return 'true' if 'rune' is alphanumeric, 'false' otherwise.
|
||
|
bool tred_rune_is_alphanumeric(rune_t rune);
|
||
|
|
||
|
/// @brief Tests whether a rune_t is a sign of one of the four ASCII sign ranges.
|
||
|
/// @param rune Rune to test for being a sign.
|
||
|
/// @return 'true' if 'rune' is from either of the four ASCII sign ranges, 'false' otherwise.
|
||
|
bool tred_rune_is_sign(rune_t rune);
|
||
|
bool tred_rune_is_control(rune_t rune);
|
||
|
|
||
|
#endif // REDUCTOR_TEXT_H
|