diff --git a/core/exports/librr/runes.h b/core/exports/librr/runes.h index 7f7f91a..e9d1ce4 100644 --- a/core/exports/librr/runes.h +++ b/core/exports/librr/runes.h @@ -4,115 +4,14 @@ #include -/// @brief -/// @param string -/// @param offset -/// @param increase +/// @brief Extracts an UTF-8 rune at a given offset in a string and ADDS the length +/// of the rune to the number pointed to by 'increase'. +/// @param string The string to get the data from. For safety reasons, this should be null-terminated. +/// @param offset The offset in the string to get the data from. +/// @param increase A pointer to the number to which the length of the rune will be added. /// @return The UTF-8 character which was extracted OR 0 is the function failed. rune_t rr_extract_utf8(const char *string, usz_t offset, usz_t *increase); -/// @brief Extracts a lowercase letter at some offset in an UTF-8 - string. -/// The function adds the rest length of the found lowercase letter to an -/// integer to which a pointer was given. -/// @attention 'advance' will be ADDED TO, it won't be set to a comletely new value. -/// @attention Not the full length of the rune but rather the rest length, -/// the length starting from the reading offset going to the end of the rune -/// will be added onto 'advance'. -/// @param string Null-terminated UTF-8 (or ASCII) string from which the -/// rune should be extracted. This will not be modified. -/// @param offset Offset from the start of the string at which the rune will -/// be read. This doesn't have to point to the start of the rune; the start -/// will be found. -/// @param advance How many bytes 'offset' must be advanced to get to the first -/// byte of the next rune. This will NOT be set if the rune is not a lowercase letter. -/// @return The rune which was extracted or ZERO if the found rune is not a lowercase letter. -rune_t rr_extract_lower(const char *string, usz_t offset, usz_t *advance); - -/// @brief Extracts an uppercase letter at some offset in an UTF-8 - string. -/// The function adds the rest length of the found uppercase letter to an -/// integer to which a pointer was given. -/// @attention 'advance' will be ADDED TO, it won't be set to a comletely new value. -/// @attention Not the full length of the rune but rather the rest length, -/// the length starting from the reading offset going to the end of the rune -/// will be added onto 'advance'. -/// @param string Null-terminated UTF-8 (or ASCII) string from which the -/// rune should be extracted. This will not be modified. -/// @param offset Offset from the start of the string at which the rune will -/// be read. This doesn't have to point to the start of the rune; the start -/// will be found. -/// @param advance How many bytes 'offset' must be advanced to get to the first -/// byte of the next rune. This will NOT be set if the rune is not an uppercase letter. -/// @return The rune which was extracted or ZERO if the found rune is not an uppercase letter. -rune_t rr_extract_upper(const char *string, usz_t offset, usz_t *advance); - -/// @brief Extracts a letter at some offset in an UTF-8 - string. -/// The function adds the rest length of the found letter to an -/// integer to which a pointer was given. -/// @attention 'advance' will be ADDED TO, it won't be set to a comletely new value. -/// @attention Not the full length of the rune but rather the rest length, -/// the length starting from the reading offset going to the end of the rune -/// will be added onto 'advance'. -/// @param string Null-terminated UTF-8 (or ASCII) string from which the -/// rune should be extracted. This will not be modified. -/// @param offset Offset from the start of the string at which the rune will -/// be read. This doesn't have to point to the start of the rune; the start -/// will be found. -/// @param advance How many bytes 'offset' must be advanced to get to the first -/// byte of the next rune. This will NOT be set if the rune is not a letter. -/// @return The rune which was extracted or ZERO if the found rune is not a letter. -rune_t rr_extract_letter(const char *string, usz_t offset, usz_t *advance); - -/// @brief Extracts a digit at some offset in an UTF-8 - string. -/// The function adds the rest length of the found digit to an -/// integer to which a pointer was given. -/// @attention 'advance' will be ADDED TO, it won't be set to a comletely new value. -/// @attention Not the full length of the rune but rather the rest length, -/// the length starting from the reading offset going to the end of the rune -/// will be added onto 'advance'. -/// @param string Null-terminated UTF-8 (or ASCII) string from which the -/// rune should be extracted. This will not be modified. -/// @param offset Offset from the start of the string at which the rune will -/// be read. This doesn't have to point to the start of the rune; the start -/// will be found. -/// @param advance How many bytes 'offset' must be advanced to get to the first -/// byte of the next rune. This will NOT be set if the rune is not a digit. -/// @return The rune which was extracted or ZERO if the found rune is not a digit. -rune_t rr_extract_digit(const char *string, usz_t offset, usz_t *advance); - -/// @brief Extracts an alphanumeric rune at some offset in an UTF-8 - string. -/// The function adds the rest length of the alphanumeric rune found to an -/// integer to which a pointer was given. -/// @attention 'advance' will be ADDED TO, it won't be set to a comletely new value. -/// @attention Not the full length of the rune but rather the rest length, -/// the length starting from the reading offset going to the end of the rune -/// will be added onto 'advance'. -/// @param string Null-terminated UTF-8 (or ASCII) string from which the -/// rune should be extracted. This will not be modified. -/// @param offset Offset from the start of the string at which the rune will -/// be read. This doesn't have to point to the start of the rune; the start -/// will be found. -/// @param advance How many bytes 'offset' must be advanced to get to the first -/// byte of the next rune. This will NOT be set if the rune is not an alphanumeric rune. -/// @return The rune which was extracted or ZERO if the found rune is not alphanumeric. -rune_t rr_extract_alphanumeric(const char *string, usz_t offset, usz_t *advance); - -/// @brief Extracts a special sign (such as slash, at, the hash sign, etc.) -/// at some offset in an UTF-8 - string. The function adds the rest length -/// of the found sign to an integer to which a pointer was given. -/// @attention 'advance' will be ADDED TO, it won't be set to a comletely new value. -/// @attention Not the full length of the rune but rather the rest length, -/// the length starting from the reading offset going to the end of the rune -/// will be added onto 'advance'. -/// @param string Null-terminated UTF-8 (or ASCII) string from which the -/// rune should be extracted. This will not be modified. -/// @param offset Offset from the start of the string at which the rune will -/// be read. This doesn't have to point to the start of the rune; the start -/// will be found. -/// @param advance How many bytes 'offset' must be advanced to get to the first -/// byte of the next rune. This will NOT be set if the rune is not a sign. -/// @return The rune which was extracted or ZERO if the found rune is not a sign. -rune_t rr_extract_sign(const char *string, usz_t offset, usz_t *advance); - /// @brief Checks if there is a newline delimiter at a specific offset in a string /// and writes the offset right after it to a given pointer's destination. /// @attention The next rune's offset will replace the previous content of the given pointer, @@ -125,4 +24,29 @@ rune_t rr_extract_sign(const char *string, usz_t offset, usz_t *advance); /// @return TRUE if there is a newline at that point and FALSE if not. bool_t rr_check_newline(const char *string, usz_t offset, usz_t *next); +/// @brief Checks if a rune is a lowercase ASCII-rune. +/// @param rune The rune to be checked. +/// @return Whether the rune is a lowercase ASCII-rune. +bool_t rr_rune_is_lower(rune_t rune); + +/// @brief Checks if a rune is an uppercase ASCII-rune. +/// @param rune The rune to be checked. +/// @return Whether the rune is an uppercase ASCII-rune. +bool_t rr_rune_is_upper(rune_t rune); + +/// @brief Checks if a rune is an ASCII-letter. +/// @param rune The rune to be checked. +/// @return Whether the rune is an ASCII-letter. +bool_t rr_rune_is_letter(rune_t rune); + +/// @brief Checks if a rune is a digit in ASCII. +/// @param rune The rune to be checked. +/// @return Whether the rune is a digit in ASCII. +bool_t rr_rune_is_digit(rune_t rune); + +/// @brief Checks if a rune is of one of the four ASCII sign ranges. +/// @param rune The rune to be checked. +/// @return Whether the rune is of one of the four ASCII sign ranges. +bool_t rr_rune_is_ascii_special(rune_t rune); + #endif // LIBRR_RUNES_H diff --git a/core/src-c/runes.c b/core/src-c/runes.c index 17d2da3..aea4930 100644 --- a/core/src-c/runes.c +++ b/core/src-c/runes.c @@ -89,99 +89,6 @@ rune_t rr_extract_utf8(const char *string, usz_t offset, usz_t *increase) return rr_postprocess_utf8_bytes(&string[offset], rune_length); } -rune_t rr_extract_lower(const char *string, usz_t offset, usz_t *increase) -{ - usz_t increase_backup = *increase; - rune_t subject = rr_extract_utf8(string, offset, &increase_backup); - - if(subject < 'a') return ZERO; - if(subject > 'z') return ZERO; - - (*increase) = increase_backup; - return subject; -} - -rune_t rr_extract_upper(const char *string, usz_t offset, usz_t *increase) -{ - usz_t increase_backup = *increase; - rune_t subject = rr_extract_utf8(string, offset, &increase_backup); - - if(subject < 'A') return ZERO; - if(subject > 'Z') return ZERO; - - (*increase) = increase_backup; - return subject; -} - -rune_t rr_extract_letter(const char *string, usz_t offset, usz_t *increase) -{ - rune_t subject; - if((subject = rr_extract_lower(string, offset, increase)) != ZERO) return subject; - if((subject = rr_extract_upper(string, offset, increase)) != ZERO) return subject; - return ZERO; -} - -rune_t rr_extract_digit(const char *string, usz_t offset, usz_t *increase) -{ - usz_t increase_backup = *increase; - rune_t subject = rr_extract_utf8(string, offset, &increase_backup); - - if(subject < '0') return ZERO; - if(subject > '9') return ZERO; - - (*increase) = increase_backup; - return subject; -} - -rune_t rr_extract_alphanumeric(const char *string, usz_t offset, usz_t *increase) -{ - rune_t subject; - if((subject = rr_extract_lower(string, offset, increase)) != ZERO) return subject; - if((subject = rr_extract_upper(string, offset, increase)) != ZERO) return subject; - if((subject = rr_extract_digit(string, offset, increase)) != ZERO) return subject; - return ZERO; -} - -bool_t rr_is_rune_of_sign_block_1(rune_t rune) -{ - if(rune < 0x20) return FALSE; - if(rune > 0x2f) return FALSE; - return TRUE; -} - -bool_t rr_is_rune_of_sign_block_2(rune_t rune) -{ - if(rune < 0x3a) return FALSE; - if(rune > 0x40) return FALSE; - return TRUE; -} - -bool_t rr_is_rune_of_sign_block_3(rune_t rune) -{ - if(rune < 0x5b) return FALSE; - if(rune > 0x60) return FALSE; - return TRUE; -} - -bool_t rr_is_rune_of_sign_block_4(rune_t rune) -{ - if(rune < 0x7b) return FALSE; - if(rune > 0x7e) return FALSE; - return TRUE; -} - -rune_t rr_extract_sign(const char *string, usz_t offset, usz_t *increase) -{ - usz_t increase_backup = *increase; - rune_t subject = rr_extract_utf8(string, offset, &increase_backup); - - if(rr_is_rune_of_sign_block_1(subject)) return subject; - if(rr_is_rune_of_sign_block_2(subject)) return subject; - if(rr_is_rune_of_sign_block_3(subject)) return subject; - if(rr_is_rune_of_sign_block_4(subject)) return subject; - return ZERO; -} - bool_t rr_check_newline(const char *string, usz_t offset, usz_t *next) { rune_t subject = rr_extract_utf8(string, offset, &offset); @@ -199,3 +106,68 @@ bool_t rr_check_newline(const char *string, usz_t offset, usz_t *next) } return FALSE; } + +bool_t rr_rune_is_lower(rune_t rune) +{ + if(rune < 'a') return FALSE; + if(rune > 'z') return FALSE; + return TRUE; +} + +bool_t rr_rune_is_upper(rune_t rune) +{ + if(rune < 'A') return FALSE; + if(rune > 'Z') return FALSE; + return TRUE; +} + +bool_t rr_rune_is_letter(rune_t rune) +{ + if(rr_rune_is_lower(rune)) return TRUE; + if(rr_rune_is_upper(rune)) return TRUE; + return FALSE; +} + +bool_t rr_rune_is_digit(rune_t rune) +{ + if(rune < '0') return FALSE; + if(rune > '9') return FALSE; + return TRUE; +} + +bool_t rr_rune_is_in_ascii_special_block_1(rune_t rune) +{ + if(rune < '!') return FALSE; + if(rune > '/') return FALSE; + return TRUE; +} + +bool_t rr_rune_is_in_ascii_special_block_2(rune_t rune) +{ + if(rune < ':') return FALSE; + if(rune > '@') return FALSE; + return TRUE; +} + +bool_t rr_rune_is_in_ascii_special_block_3(rune_t rune) +{ + if(rune < '[') return FALSE; + if(rune > '`') return FALSE; + return TRUE; +} + +bool_t rr_rune_is_in_ascii_special_block_4(rune_t rune) +{ + if(rune < '{') return FALSE; + if(rune > '~') return FALSE; + return TRUE; +} + +bool_t rr_rune_is_ascii_special(rune_t rune) +{ + if(rr_rune_is_in_ascii_special_block_1(rune)) return TRUE; + if(rr_rune_is_in_ascii_special_block_2(rune)) return TRUE; + if(rr_rune_is_in_ascii_special_block_3(rune)) return TRUE; + if(rr_rune_is_in_ascii_special_block_4(rune)) return TRUE; + return FALSE; +}