Removed extraction-functions for specific rune types and added checking-only functions for runes.

2023-12-31 08:16:55 +01:00 · 2023-12-31 08:16:55 +01:00 · 6e156438c1
parent 6d7f2ba701
commit 6e156438c1
2 changed files with 95 additions and 199 deletions
--- a/core/exports/librr/runes.h
+++ b/core/exports/librr/runes.h
@ -4,115 +4,14 @@

 #include <librr/types.h>

-/// @brief 
-/// @param string 
-/// @param offset 
-/// @param increase 
+/// @brief Extracts an UTF-8 rune at a given offset in a string and ADDS the length
+///  of the rune to the number pointed to by 'increase'.
+/// @param string The string to get the data from. For safety reasons, this should be null-terminated.
+/// @param offset The offset in the string to get the data from.
+/// @param increase A pointer to the number to which the length of the rune will be added.
 /// @return The UTF-8 character which was extracted OR 0 is the function failed.
 rune_t rr_extract_utf8(const char *string, usz_t offset, usz_t *increase);

-/// @brief Extracts a lowercase letter at some offset in an UTF-8 - string.
-///  The function adds the rest length of the found lowercase letter to an
-///  integer to which a pointer was given.
-/// @attention 'advance' will be ADDED TO, it won't be set to a comletely new value.
-/// @attention Not the full length of the rune but rather the rest length,
-///  the length starting from the reading offset going to the end of the rune
-///  will be added onto 'advance'.
-/// @param string Null-terminated UTF-8 (or ASCII) string from which the
-///  rune should be extracted. This will not be modified.
-/// @param offset Offset from the start of the string at which the rune will
-///  be read. This doesn't have to point to the start of the rune; the start
-///  will be found.
-/// @param advance How many bytes 'offset' must be advanced to get to the first
-///  byte of the next rune. This will NOT be set if the rune is not a lowercase letter.
-/// @return The rune which was extracted or ZERO if the found rune is not a lowercase letter.
-rune_t rr_extract_lower(const char *string, usz_t offset, usz_t *advance);
-
-/// @brief Extracts an uppercase letter at some offset in an UTF-8 - string.
-///  The function adds the rest length of the found uppercase letter to an
-///  integer to which a pointer was given.
-/// @attention 'advance' will be ADDED TO, it won't be set to a comletely new value.
-/// @attention Not the full length of the rune but rather the rest length,
-///  the length starting from the reading offset going to the end of the rune
-///  will be added onto 'advance'.
-/// @param string Null-terminated UTF-8 (or ASCII) string from which the
-///  rune should be extracted. This will not be modified.
-/// @param offset Offset from the start of the string at which the rune will
-///  be read. This doesn't have to point to the start of the rune; the start
-///  will be found.
-/// @param advance How many bytes 'offset' must be advanced to get to the first
-///  byte of the next rune. This will NOT be set if the rune is not an uppercase letter.
-/// @return The rune which was extracted or ZERO if the found rune is not an uppercase letter.
-rune_t rr_extract_upper(const char *string, usz_t offset, usz_t *advance);
-
-/// @brief Extracts a letter at some offset in an UTF-8 - string.
-///  The function adds the rest length of the found letter to an
-///  integer to which a pointer was given.
-/// @attention 'advance' will be ADDED TO, it won't be set to a comletely new value.
-/// @attention Not the full length of the rune but rather the rest length,
-///  the length starting from the reading offset going to the end of the rune
-///  will be added onto 'advance'.
-/// @param string Null-terminated UTF-8 (or ASCII) string from which the
-///  rune should be extracted. This will not be modified.
-/// @param offset Offset from the start of the string at which the rune will
-///  be read. This doesn't have to point to the start of the rune; the start
-///  will be found.
-/// @param advance How many bytes 'offset' must be advanced to get to the first
-///  byte of the next rune. This will NOT be set if the rune is not a letter.
-/// @return The rune which was extracted or ZERO if the found rune is not a letter.
-rune_t rr_extract_letter(const char *string, usz_t offset, usz_t *advance);
-
-/// @brief Extracts a digit at some offset in an UTF-8 - string.
-///  The function adds the rest length of the found digit to an
-///  integer to which a pointer was given.
-/// @attention 'advance' will be ADDED TO, it won't be set to a comletely new value.
-/// @attention Not the full length of the rune but rather the rest length,
-///  the length starting from the reading offset going to the end of the rune
-///  will be added onto 'advance'.
-/// @param string Null-terminated UTF-8 (or ASCII) string from which the
-///  rune should be extracted. This will not be modified.
-/// @param offset Offset from the start of the string at which the rune will
-///  be read. This doesn't have to point to the start of the rune; the start
-///  will be found.
-/// @param advance How many bytes 'offset' must be advanced to get to the first
-///  byte of the next rune. This will NOT be set if the rune is not a digit.
-/// @return The rune which was extracted or ZERO if the found rune is not a digit.
-rune_t rr_extract_digit(const char *string, usz_t offset, usz_t *advance);
-
-/// @brief Extracts an alphanumeric rune at some offset in an UTF-8 - string.
-///  The function adds the rest length of the alphanumeric rune found to an
-///  integer to which a pointer was given.
-/// @attention 'advance' will be ADDED TO, it won't be set to a comletely new value.
-/// @attention Not the full length of the rune but rather the rest length,
-///  the length starting from the reading offset going to the end of the rune
-///  will be added onto 'advance'.
-/// @param string Null-terminated UTF-8 (or ASCII) string from which the
-///  rune should be extracted. This will not be modified.
-/// @param offset Offset from the start of the string at which the rune will
-///  be read. This doesn't have to point to the start of the rune; the start
-///  will be found.
-/// @param advance How many bytes 'offset' must be advanced to get to the first
-///  byte of the next rune. This will NOT be set if the rune is not an alphanumeric rune.
-/// @return The rune which was extracted or ZERO if the found rune is not alphanumeric.
-rune_t rr_extract_alphanumeric(const char *string, usz_t offset, usz_t *advance);
-
-/// @brief Extracts a special sign (such as slash, at, the hash sign, etc.)
-///  at some offset in an UTF-8 - string. The function adds the rest length
-///  of the found sign to an integer to which a pointer was given.
-/// @attention 'advance' will be ADDED TO, it won't be set to a comletely new value.
-/// @attention Not the full length of the rune but rather the rest length,
-///  the length starting from the reading offset going to the end of the rune
-///  will be added onto 'advance'.
-/// @param string Null-terminated UTF-8 (or ASCII) string from which the
-///  rune should be extracted. This will not be modified.
-/// @param offset Offset from the start of the string at which the rune will
-///  be read. This doesn't have to point to the start of the rune; the start
-///  will be found.
-/// @param advance How many bytes 'offset' must be advanced to get to the first
-///  byte of the next rune. This will NOT be set if the rune is not a sign.
-/// @return The rune which was extracted or ZERO if the found rune is not a sign.
-rune_t rr_extract_sign(const char *string, usz_t offset, usz_t *advance);
-
 /// @brief Checks if there is a newline delimiter at a specific offset in a string
 ///  and writes the offset right after it to a given pointer's destination.
 /// @attention The next rune's offset will replace the previous content of the given pointer,
@ -125,4 +24,29 @@ rune_t rr_extract_sign(const char *string, usz_t offset, usz_t *advance);
 /// @return TRUE if there is a newline at that point and FALSE if not.
 bool_t rr_check_newline(const char *string, usz_t offset, usz_t *next);

+/// @brief Checks if a rune is a lowercase ASCII-rune.
+/// @param rune The rune to be checked.
+/// @return Whether the rune is a lowercase ASCII-rune.
+bool_t rr_rune_is_lower(rune_t rune);
+
+/// @brief Checks if a rune is an uppercase ASCII-rune.
+/// @param rune The rune to be checked.
+/// @return Whether the rune is an uppercase ASCII-rune.
+bool_t rr_rune_is_upper(rune_t rune);
+
+/// @brief Checks if a rune is an ASCII-letter.
+/// @param rune The rune to be checked.
+/// @return Whether the rune is an ASCII-letter.
+bool_t rr_rune_is_letter(rune_t rune);
+
+/// @brief Checks if a rune is a digit in ASCII.
+/// @param rune The rune to be checked.
+/// @return Whether the rune is a digit in ASCII.
+bool_t rr_rune_is_digit(rune_t rune);
+
+/// @brief Checks if a rune is of one of the four ASCII sign ranges.
+/// @param rune The rune to be checked.
+/// @return Whether the rune is of one of the four ASCII sign ranges.
+bool_t rr_rune_is_ascii_special(rune_t rune);
+
 #endif // LIBRR_RUNES_H
--- a/core/src-c/runes.c
+++ b/core/src-c/runes.c
@ -89,99 +89,6 @@ rune_t rr_extract_utf8(const char *string, usz_t offset, usz_t *increase)
    return rr_postprocess_utf8_bytes(&string[offset], rune_length);
 }

-rune_t rr_extract_lower(const char *string, usz_t offset, usz_t *increase)
-{
-    usz_t                       increase_backup                 = *increase;
-    rune_t                      subject                         = rr_extract_utf8(string, offset, &increase_backup);
-
-    if(subject < 'a') return ZERO;
-    if(subject > 'z') return ZERO;
-
-    (*increase)                     = increase_backup;
-    return subject;
-}
-
-rune_t rr_extract_upper(const char *string, usz_t offset, usz_t *increase)
-{
-    usz_t                       increase_backup                 = *increase;
-    rune_t                      subject                         = rr_extract_utf8(string, offset, &increase_backup);
-
-    if(subject < 'A') return ZERO;
-    if(subject > 'Z') return ZERO;
-
-    (*increase)                     = increase_backup;
-    return subject;
-}
-
-rune_t rr_extract_letter(const char *string, usz_t offset, usz_t *increase)
-{
-    rune_t                      subject;
-    if((subject = rr_extract_lower(string, offset, increase)) != ZERO) return subject;
-    if((subject = rr_extract_upper(string, offset, increase)) != ZERO) return subject;
-    return ZERO;
-}
-
-rune_t rr_extract_digit(const char *string, usz_t offset, usz_t *increase)
-{
-    usz_t                       increase_backup                 = *increase;
-    rune_t                      subject                         = rr_extract_utf8(string, offset, &increase_backup);
-
-    if(subject < '0') return ZERO;
-    if(subject > '9') return ZERO;
-
-    (*increase)                     = increase_backup;
-    return subject;
-}
-
-rune_t rr_extract_alphanumeric(const char *string, usz_t offset, usz_t *increase)
-{
-    rune_t                      subject;
-    if((subject = rr_extract_lower(string, offset, increase)) != ZERO) return subject;
-    if((subject = rr_extract_upper(string, offset, increase)) != ZERO) return subject;
-    if((subject = rr_extract_digit(string, offset, increase)) != ZERO) return subject;
-    return ZERO;
-}
-
-bool_t rr_is_rune_of_sign_block_1(rune_t rune)
-{
-    if(rune < 0x20) return FALSE;
-    if(rune > 0x2f) return FALSE;
-    return TRUE;
-}
-
-bool_t rr_is_rune_of_sign_block_2(rune_t rune)
-{
-    if(rune < 0x3a) return FALSE;
-    if(rune > 0x40) return FALSE;
-    return TRUE;
-}
-
-bool_t rr_is_rune_of_sign_block_3(rune_t rune)
-{
-    if(rune < 0x5b) return FALSE;
-    if(rune > 0x60) return FALSE;
-    return TRUE;
-}
-
-bool_t rr_is_rune_of_sign_block_4(rune_t rune)
-{
-    if(rune < 0x7b) return FALSE;
-    if(rune > 0x7e) return FALSE;
-    return TRUE;
-}
-
-rune_t rr_extract_sign(const char *string, usz_t offset, usz_t *increase)
-{
-    usz_t                       increase_backup                 = *increase;
-    rune_t                      subject                         = rr_extract_utf8(string, offset, &increase_backup);
-
-    if(rr_is_rune_of_sign_block_1(subject)) return subject;
-    if(rr_is_rune_of_sign_block_2(subject)) return subject;
-    if(rr_is_rune_of_sign_block_3(subject)) return subject;
-    if(rr_is_rune_of_sign_block_4(subject)) return subject;
-    return ZERO;
-}
-
 bool_t rr_check_newline(const char *string, usz_t offset, usz_t *next)
 {
    rune_t                      subject                         = rr_extract_utf8(string, offset, &offset);
@ -199,3 +106,68 @@ bool_t rr_check_newline(const char *string, usz_t offset, usz_t *next)
    }
    return FALSE;
 }
+
+bool_t rr_rune_is_lower(rune_t rune)
+{
+    if(rune < 'a') return FALSE;
+    if(rune > 'z') return FALSE;
+    return TRUE;
+}
+
+bool_t rr_rune_is_upper(rune_t rune)
+{
+    if(rune < 'A') return FALSE;
+    if(rune > 'Z') return FALSE;
+    return TRUE;
+}
+
+bool_t rr_rune_is_letter(rune_t rune)
+{
+    if(rr_rune_is_lower(rune)) return TRUE;
+    if(rr_rune_is_upper(rune)) return TRUE;
+    return FALSE;
+}
+
+bool_t rr_rune_is_digit(rune_t rune)
+{
+    if(rune < '0') return FALSE;
+    if(rune > '9') return FALSE;
+    return TRUE;
+}
+
+bool_t rr_rune_is_in_ascii_special_block_1(rune_t rune)
+{
+    if(rune < '!') return FALSE;
+    if(rune > '/') return FALSE;
+    return TRUE;
+}
+
+bool_t rr_rune_is_in_ascii_special_block_2(rune_t rune)
+{
+    if(rune < ':') return FALSE;
+    if(rune > '@') return FALSE;
+    return TRUE;
+}
+
+bool_t rr_rune_is_in_ascii_special_block_3(rune_t rune)
+{
+    if(rune < '[') return FALSE;
+    if(rune > '`') return FALSE;
+    return TRUE;
+}
+
+bool_t rr_rune_is_in_ascii_special_block_4(rune_t rune)
+{
+    if(rune < '{') return FALSE;
+    if(rune > '~') return FALSE;
+    return TRUE;
+}
+
+bool_t rr_rune_is_ascii_special(rune_t rune)
+{
+    if(rr_rune_is_in_ascii_special_block_1(rune)) return TRUE;
+    if(rr_rune_is_in_ascii_special_block_2(rune)) return TRUE;
+    if(rr_rune_is_in_ascii_special_block_3(rune)) return TRUE;
+    if(rr_rune_is_in_ascii_special_block_4(rune)) return TRUE;
+    return FALSE;
+}