From ad415bcafe9693b6fb51ab902ca63db373a1b318 Mon Sep 17 00:00:00 2001
From: Eric-Paul Ickhorn <ericp.ickhorn@gmail.com>
Date: Tue, 28 Nov 2023 20:27:20 +0100
Subject: [PATCH] Made tokenizer store lines and columns of tokens

---
 code/inc/tokenizer.h |  3 +++
 code/src/tokenizer.c | 53 +++++++++++++++++++++++++++++++++++---------
 2 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/code/inc/tokenizer.h b/code/inc/tokenizer.h
index e5677fe..08d8ee2 100644
--- a/code/inc/tokenizer.h
+++ b/code/inc/tokenizer.h
@@ -38,6 +38,9 @@ struct pac_token
     pac_token_e                 type;
     usz_t                       offset;
     usz_t                       length;
+    
+    usz_t                       line;
+    usz_t                       column;
 };
 
 struct pac_tlist
diff --git a/code/src/tokenizer.c b/code/src/tokenizer.c
index b275274..ca8ef3a 100644
--- a/code/src/tokenizer.c
+++ b/code/src/tokenizer.c
@@ -52,7 +52,10 @@ pac_tlist_s pac_tokenize_grammar(char *source, usz_t len_source)
     list.source                     = source;
     list.num_tokens                 = 0;
     list.tokens                     = calloc(sizeof(pac_token_s), tokens_capacity);
-    for(usz_t offset = 0; offset < len_source; ++offset)
+
+    usz_t                   line                    = 1;
+    usz_t                   column                  = 1;
+    for(usz_t offset = 0; offset < len_source; ++offset, ++column)
     {
         // The subject of the current iteration of this loop;
         // the rune which is being looked at
@@ -64,18 +67,31 @@ pac_tlist_s pac_tokenize_grammar(char *source, usz_t len_source)
             list.tokens                     = realloc(list.tokens, sizeof(pac_token_s) * tokens_capacity);
         }
 
+        if(subject == '\n')
+        {
+            ++line;
+            column                      = 0;        // The for()-header will make it 1.
+            continue;
+        }
+
         if(pac_rune_is_blank(subject))
             continue;
-        
+
         if(subject == '"')
         {
             usz_t                   start_offset            = offset;
+            usz_t                   start_line              = line;
+            usz_t                   start_column            = column;
             while(offset < len_source)
             {
                 if(subject == '\\')
+                {
                     ++offset;
-                
+                    ++column;
+                }
                 ++offset;
+                ++column;
+
                 subject                         = source[offset];
 
                 if(subject == '"')
@@ -85,6 +101,8 @@ pac_tlist_s pac_tokenize_grammar(char *source, usz_t len_source)
             token.type                      = PAC_TOKEN_LIT_STRING;
             token.offset                    = start_offset + 1;             // +1 for skipping the starting quotation mark
             token.length                    = (offset - start_offset) - 1;  // -1 for leaving the ending quotation mark out 
+            token.line                      = start_line;
+            token.column                    = start_column;
             
             list.tokens[list.num_tokens]    = token;
             ++list.num_tokens;
@@ -94,9 +112,12 @@ pac_tlist_s pac_tokenize_grammar(char *source, usz_t len_source)
         if(pac_rune_is_letter(subject) || (subject == '_'))
         {
             usz_t                   start_offset            = offset;
+            usz_t                   start_line              = line;
+            usz_t                   start_column            = column;
             while(offset < len_source)
             {
                 ++offset;
+                ++column;
                 subject                         = source[offset];
                 
                 if(!pac_rune_is_letter(subject) && !pac_rune_is_digit(subject) && (subject != '_'))
@@ -108,7 +129,10 @@ pac_tlist_s pac_tokenize_grammar(char *source, usz_t len_source)
             token.offset                    = start_offset;
             token.length                    = offset - start_offset;
             token.type                      = pac_convert_word_to_token_type(&source[start_offset], token.length);
+            token.line                      = start_line;
+            token.column                    = start_column;
             --offset; // The for() - header will skip to the character after the word.
+            --column;
             
             list.tokens[list.num_tokens]    = token;
             ++list.num_tokens;
@@ -117,8 +141,7 @@ pac_tlist_s pac_tokenize_grammar(char *source, usz_t len_source)
 
         if(subject == '#')
         {
-            usz_t                   offset_copy             = offset;
-            ++offset_copy;
+            usz_t                   offset_copy             = offset + 1;
 
             if(offset_copy < len_source)
             {
@@ -136,17 +159,23 @@ pac_tlist_s pac_tokenize_grammar(char *source, usz_t len_source)
                 }
                 else if(second_sign == '[')
                 {
-                    while(offset_copy < len_source)
+                    usz_t                   line_copy               = line;
+                    usz_t                   column_copy             = column + 1; // +1, because there also is one at
+                    while(offset_copy < len_source)                               // the creation of 'offset_copy'.
                     {
                         if(source[offset_copy] == ']')
                         {
+                            // TODO: There should be a check for wheter there is one square bracket following this one.
                             break;
                         }
-                        // If there's an escaped character here, do one jump more
-                        if(source[offset_copy] == '\\')
-                            ++offset_copy;
+                        if(source[offset_copy] == '\n')
+                            ++line_copy;
+
                         ++offset_copy;
+                        ++column_copy;
                     }
+                    line                            = line_copy;
+                    column                          = column_copy;
                 }
                 offset                          = offset_copy;
             }
@@ -159,6 +188,8 @@ pac_tlist_s pac_tokenize_grammar(char *source, usz_t len_source)
             token.type                      = pac_convert_sign_to_token_type(subject);
             token.offset                    = offset;
             token.length                    = 1;
+            token.line                      = line;
+            token.column                    = column;
             
             list.tokens[list.num_tokens]    = token;
             ++list.num_tokens;
@@ -219,7 +250,9 @@ void pac_display_tlist(pac_tlist_s list)
 
         char                   *token_type_string   = pac_stringify_token_type(token.type);
         char                   *token_type_indent   = pac_create_spaces_for_indent(24 - strlen(token_type_string));
-        printf("[%s]:%s %s\n",
+        printf("Column %-3lu @ Line %lu:     [%s]%s %s\n",
+            token.column,
+            token.line,
             token_type_string,
             token_type_indent,
             content