Implemented with Antigravity.

2026-05-26 19:21:36 +10:00
parent 873ac50dee
commit 2d90caa285
6 changed files with 1283 additions and 5 deletions
@@ -0,0 +1,216 @@
+#include "../Headers/scc_core.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+// Helper: Trim leading and trailing whitespace
+static char* trim_whitespace(char* str) {
+    while (isspace((unsigned char)*str)) {
+        str++;
+    }
+    if (*str == '\0') {
+        return str;
+    }
+    char* end = str + strlen(str) - 1;
+    while (end > str && isspace((unsigned char)*end)) {
+        end--;
+    }
+    *(end + 1) = '\0';
+    return str;
+}
+
+// Helper: Get next line from buffer
+static bool get_next_line(char** cursor, char* line_buf, int max_len) {
+    char* c = *cursor;
+    if (*c == '\0') return false;
+    int idx = 0;
+    while (*c != '\0' && *c != '\n' && *c != '\r' && idx < max_len - 1) {
+        line_buf[idx++] = *c++;
+    }
+    line_buf[idx] = '\0';
+    
+    // Skip newline characters
+    if (*c == '\r') c++;
+    if (*c == '\n') c++;
+    
+    *cursor = c;
+    return true;
+}
+
+// Helper: Split line into tokens, respecting quotes
+static void split_tokens(const char* str, char*** out_tokens, uint64_t* out_count) {
+    char** tokens = NULL;
+    uint64_t count = 0;
+    const char* p = str;
+    while (*p != '\0') {
+        while (isspace((unsigned char)*p)) p++;
+        if (*p == '\0') break;
+        
+        const char* start = p;
+        if (*p == '"') {
+            p++;
+            while (*p != '\0' && *p != '"') p++;
+            if (*p == '"') p++;
+        } else if (*p == '\'') {
+            p++;
+            while (*p != '\0' && *p != '\'') p++;
+            if (*p == '\'') p++;
+        } else {
+            while (*p != '\0' && !isspace((unsigned char)*p)) p++;
+        }
+        
+        size_t len = p - start;
+        char* token = (char*)malloc(len + 1);
+        memcpy(token, start, len);
+        token[len] = '\0';
+        
+        count++;
+        tokens = (char**)realloc(tokens, count * sizeof(char*));
+        tokens[count - 1] = token;
+    }
+    *out_tokens = tokens;
+    *out_count = count;
+}
+
+bool scc_read_rule_from_cstr(char *content, scc_rules *output_rule) {
+    if (!content || !output_rule) return false;
+
+    // Initialize output structure
+    output_rule->rules = NULL;
+    output_rule->rule_count = 0;
+    output_rule->syntax_ids = NULL;
+    output_rule->syntax_id_count = 0;
+
+    typedef enum {
+        STATE_NONE,
+        STATE_SYNTAX_IDS,
+        STATE_RULES
+    } ParserState;
+
+    ParserState state = STATE_NONE;
+    scc_rule* current_rule = NULL;
+
+    char* cursor = content;
+    char line[4096];
+
+    while (get_next_line(&cursor, line, sizeof(line))) {
+        char* trimmed = trim_whitespace(line);
+
+        // Skip comments and empty lines
+        if (trimmed[0] == '\0' || trimmed[0] == '#' || (trimmed[0] == '/' && trimmed[1] == '/')) {
+            continue;
+        }
+
+        // Section switches
+        if (strcmp(trimmed, "syntax_ids:") == 0) {
+            state = STATE_SYNTAX_IDS;
+            continue;
+        } else if (strcmp(trimmed, "rules:") == 0) {
+            state = STATE_RULES;
+            continue;
+        }
+
+        if (state == STATE_SYNTAX_IDS) {
+            // Trimmed line is a syntax ID
+            output_rule->syntax_id_count++;
+            output_rule->syntax_ids = (char**)realloc(output_rule->syntax_ids, output_rule->syntax_id_count * sizeof(char*));
+            output_rule->syntax_ids[output_rule->syntax_id_count - 1] = strdup(trimmed);
+        } else if (state == STATE_RULES) {
+            if (strcmp(trimmed, ";") == 0) {
+                current_rule = NULL;
+                continue;
+            }
+
+            if (trimmed[0] == ':' || trimmed[0] == '|') {
+                if (current_rule) {
+                    char* match_part = trimmed + 1;
+                    char** match_tokens = NULL;
+                    uint64_t match_token_count = 0;
+                    split_tokens(match_part, &match_tokens, &match_token_count);
+                    
+                    scc_matching* matching = (scc_matching*)malloc(sizeof(scc_matching));
+                    matching->match_ids = match_tokens;
+                    matching->match_id_count = match_token_count;
+                    matching->target_syntax_id = NULL;
+                    matching->using_match_id = NULL;
+                    matching->using_match_id_count = 0;
+                    
+                    current_rule->matching_count++;
+                    current_rule->matchings = (scc_matching**)realloc(current_rule->matchings, current_rule->matching_count * sizeof(scc_matching*));
+                    current_rule->matchings[current_rule->matching_count - 1] = matching;
+                }
+            } else if (strncmp(trimmed, "=>", 2) == 0) {
+                if (current_rule && current_rule->matching_count > 0) {
+                    scc_matching* matching = current_rule->matchings[current_rule->matching_count - 1];
+                    char* op_part = trimmed + 2;
+                    char** op_tokens = NULL;
+                    uint64_t op_token_count = 0;
+                    split_tokens(op_part, &op_tokens, &op_token_count);
+                    
+                    if (op_token_count > 0) {
+                        if (strcmp(op_tokens[0], "new_node") == 0) {
+                            if (op_token_count > 1) {
+                                matching->target_syntax_id = strdup(op_tokens[1]);
+                                if (op_token_count > 2) {
+                                    matching->using_match_id_count = op_token_count - 2;
+                                    matching->using_match_id = (char**)malloc(matching->using_match_id_count * sizeof(char*));
+                                    for (uint64_t i = 0; i < matching->using_match_id_count; i++) {
+                                        matching->using_match_id[i] = strdup(op_tokens[2 + i]);
+                                    }
+                                }
+                            }
+                        } else if (strcmp(op_tokens[0], "append_as_child") == 0) {
+                            matching->target_syntax_id = strdup("append_as_child");
+                            if (op_token_count > 1) {
+                                matching->using_match_id_count = op_token_count - 1;
+                                matching->using_match_id = (char**)malloc(matching->using_match_id_count * sizeof(char*));
+                                for (uint64_t i = 0; i < matching->using_match_id_count; i++) {
+                                    matching->using_match_id[i] = strdup(op_tokens[1 + i]);
+                                }
+                            }
+                        } else if (strcmp(op_tokens[0], "skip") == 0) {
+                            matching->target_syntax_id = strdup("skip");
+                        }
+                    }
+                    
+                    // Free op_tokens
+                    for (uint64_t i = 0; i < op_token_count; i++) {
+                        free(op_tokens[i]);
+                    }
+                    free(op_tokens);
+                }
+            } else {
+                // Defines a new rule (node_type_name)
+                output_rule->rule_count++;
+                output_rule->rules = (scc_rule*)realloc(output_rule->rules, output_rule->rule_count * sizeof(scc_rule));
+                
+                current_rule = &output_rule->rules[output_rule->rule_count - 1];
+                current_rule->node_type_name = strdup(trimmed);
+                current_rule->matchings = NULL;
+                current_rule->matching_count = 0;
+            }
+        }
+    }
+
+    return true;
+}
+
+bool scc_read_rule_from_file(FILE *f, scc_rules *output_rule) {
+    if (!f || !output_rule) return false;
+
+    // Determine file size
+    fseek(f, 0, SEEK_END);
+    long size = ftell(f);
+    fseek(f, 0, SEEK_SET);
+
+    char* content = (char*)malloc(size + 1);
+    if (!content) return false;
+
+    size_t read_bytes = fread(content, 1, size, f);
+    content[read_bytes] = '\0';
+
+    bool success = scc_read_rule_from_cstr(content, output_rule);
+    free(content);
+    return success;
+}