Reductor/inc-c/reductor/internals/utility/text.h

89 lines
3.3 KiB
C
Raw Normal View History

#ifndef REDUCTOR_TEXT_H
#define REDUCTOR_TEXT_H
#include <stdbool.h>
#include <stdint.h>
typedef uint32_t rune_t;
/// @brief Gets the length of the UTF-8 rune at a given offset.
/// @param source Source to get the bytes from.
/// @param len_source Length of 'source' in bytes.
/// @param offset Offset to read from; doesn't have to be the start of the UTF-8 character.
/// @return Length of the rune or zero on error.
uint32_t tred_get_utf8_rune_length(
const char *source,
uint32_t len_source,
uint32_t offset);
/// @brief Extracts a UTF-8 character at some offset.
/// @warning The length isn't provided by this function; it has to be gotten using
/// 'tred_get_utf8_rune_length'.
/// @param source Source string to get the bytes from.
/// @param len_source Length of 'source' in bytes.
/// @param offset Offset to read from; doesn't have to be the start of the UTF-8 character.
/// @return Rune read from 'source' at 'offset'.
rune_t tred_extract_utf8 (
const char *source,
uint32_t len_source,
uint32_t offset);
/// @brief Gets an ASCII character out of a source string, performing boundary checks.
/// @param source Source to read from
/// @param len_source Length of source to read from
/// @param offset Offset to read at
/// @return Rune read from 'source' at 'offset'.
rune_t tred_extract_ascii (
const char *source,
uint32_t len_source,
uint32_t offset);
int32_t tred_get_utf8_length_from_head(uint8_t head);
int32_t tred_get_utf8_head_offset(
const char *source,
uint32_t len_source,
uint32_t offset);
/// @brief Tests whether a rune is a blank (whitespace or tab) rune.
/// @param rune Rune to test for being blank.
/// @return 'true' if 'rune' is a blank character, 'false' otherwise.
bool tred_rune_is_blank(rune_t rune);
/// @brief Tests a rune_t for being a uppercase letter.
/// @param rune Rune to test for being an uppercase Latin letter.
/// @return 'true' if 'rune' is an uppercase Latin letter, 'false' otherwise.
bool tred_rune_is_uppercase(rune_t rune);
/// @brief Tests a rune_t for being a lowercase letter.
/// @param rune Rune to test for being an lowercase Latin letter.
/// @return 'true' if 'rune' is an lowercase Latin letter, 'false' otherwise.
bool tred_rune_is_lowercase(rune_t rune);
/// @brief Tests a rune_t for being either lowercase or uppercase.
/// @param rune Rune to test for being a Latin letter.
/// @return 'true' if 'rune' is either lowercase or uppercase, 'false' otherwise.
bool tred_rune_is_alphabetic(rune_t rune);
/// @brief Tests wheter a rune_t is a digit from 0 to 9.
/// @param rune Rune to test for being a digit.
/// @return 'true' if 'rune' is a digit from 0 to 9, 'false' otherwise.
bool tred_rune_is_numeric(rune_t rune);
/// @brief Tests whether a rune_t is either a digit from 0 to 9
/// or a letter (either lowercase or uppercase).
/// @param rune Rune to test for being alphanumeric
/// @return 'true' if 'rune' is alphanumeric, 'false' otherwise.
bool tred_rune_is_alphanumeric(rune_t rune);
/// @brief Tests whether a rune_t is a sign of one of the four ASCII sign ranges.
/// @param rune Rune to test for being a sign.
/// @return 'true' if 'rune' is from either of the four ASCII sign ranges, 'false' otherwise.
bool tred_rune_is_sign(rune_t rune);
bool tred_rune_is_control(rune_t rune);
#endif // REDUCTOR_TEXT_H