#include "../Headers/scc_core.h" #include #include #include #include // Helper: Trim leading and trailing whitespace static char* trim_whitespace(char* str) { while (isspace((unsigned char)*str)) { str++; } if (*str == '\0') { return str; } char* end = str + strlen(str) - 1; while (end > str && isspace((unsigned char)*end)) { end--; } *(end + 1) = '\0'; return str; } // Helper: Get next line from buffer static bool get_next_line(char** cursor, char* line_buf, int max_len) { char* c = *cursor; if (*c == '\0') return false; int idx = 0; while (*c != '\0' && *c != '\n' && *c != '\r' && idx < max_len - 1) { line_buf[idx++] = *c++; } line_buf[idx] = '\0'; // Skip newline characters if (*c == '\r') c++; if (*c == '\n') c++; *cursor = c; return true; } // Helper: Split line into tokens, respecting quotes static void split_tokens(const char* str, char*** out_tokens, uint64_t* out_count) { char** tokens = NULL; uint64_t count = 0; const char* p = str; while (*p != '\0') { while (isspace((unsigned char)*p)) p++; if (*p == '\0') break; const char* start = p; if (*p == '"') { p++; while (*p != '\0' && *p != '"') p++; if (*p == '"') p++; } else if (*p == '\'') { p++; while (*p != '\0' && *p != '\'') p++; if (*p == '\'') p++; } else { while (*p != '\0' && !isspace((unsigned char)*p)) p++; } size_t len = p - start; char* token = (char*)malloc(len + 1); memcpy(token, start, len); token[len] = '\0'; count++; tokens = (char**)realloc(tokens, count * sizeof(char*)); tokens[count - 1] = token; } *out_tokens = tokens; *out_count = count; } bool scc_read_rule_from_cstr(char *content, scc_rules *output_rule) { if (!content || !output_rule) return false; // Initialize output structure output_rule->rules = NULL; output_rule->rule_count = 0; output_rule->syntax_ids = NULL; output_rule->syntax_id_count = 0; typedef enum { STATE_NONE, STATE_SYNTAX_IDS, STATE_RULES } ParserState; ParserState state = STATE_NONE; scc_rule* current_rule = NULL; char* cursor = content; char line[4096]; while (get_next_line(&cursor, line, sizeof(line))) { char* trimmed = trim_whitespace(line); // Skip comments and empty lines if (trimmed[0] == '\0' || trimmed[0] == '#' || (trimmed[0] == '/' && trimmed[1] == '/')) { continue; } // Section switches if (strcmp(trimmed, "syntax_ids:") == 0) { state = STATE_SYNTAX_IDS; continue; } else if (strcmp(trimmed, "rules:") == 0) { state = STATE_RULES; continue; } if (state == STATE_SYNTAX_IDS) { // Trimmed line is a syntax ID output_rule->syntax_id_count++; output_rule->syntax_ids = (char**)realloc(output_rule->syntax_ids, output_rule->syntax_id_count * sizeof(char*)); output_rule->syntax_ids[output_rule->syntax_id_count - 1] = strdup(trimmed); } else if (state == STATE_RULES) { if (strcmp(trimmed, ";") == 0) { current_rule = NULL; continue; } if (trimmed[0] == ':' || trimmed[0] == '|') { if (current_rule) { char* match_part = trimmed + 1; char** match_tokens = NULL; uint64_t match_token_count = 0; split_tokens(match_part, &match_tokens, &match_token_count); scc_matching* matching = (scc_matching*)malloc(sizeof(scc_matching)); matching->match_ids = match_tokens; matching->match_id_count = match_token_count; matching->target_syntax_id = NULL; matching->using_match_id = NULL; matching->using_match_id_count = 0; current_rule->matching_count++; current_rule->matchings = (scc_matching**)realloc(current_rule->matchings, current_rule->matching_count * sizeof(scc_matching*)); current_rule->matchings[current_rule->matching_count - 1] = matching; } } else if (strncmp(trimmed, "=>", 2) == 0) { if (current_rule && current_rule->matching_count > 0) { scc_matching* matching = current_rule->matchings[current_rule->matching_count - 1]; char* op_part = trimmed + 2; char** op_tokens = NULL; uint64_t op_token_count = 0; split_tokens(op_part, &op_tokens, &op_token_count); if (op_token_count > 0) { if (strcmp(op_tokens[0], "new_node") == 0) { if (op_token_count > 1) { matching->target_syntax_id = strdup(op_tokens[1]); if (op_token_count > 2) { matching->using_match_id_count = op_token_count - 2; matching->using_match_id = (char**)malloc(matching->using_match_id_count * sizeof(char*)); for (uint64_t i = 0; i < matching->using_match_id_count; i++) { matching->using_match_id[i] = strdup(op_tokens[2 + i]); } } } } else if (strcmp(op_tokens[0], "append_as_child") == 0) { matching->target_syntax_id = strdup("append_as_child"); if (op_token_count > 1) { matching->using_match_id_count = op_token_count - 1; matching->using_match_id = (char**)malloc(matching->using_match_id_count * sizeof(char*)); for (uint64_t i = 0; i < matching->using_match_id_count; i++) { matching->using_match_id[i] = strdup(op_tokens[1 + i]); } } } else if (strcmp(op_tokens[0], "skip") == 0) { matching->target_syntax_id = strdup("skip"); } } // Free op_tokens for (uint64_t i = 0; i < op_token_count; i++) { free(op_tokens[i]); } free(op_tokens); } } else { // Defines a new rule (node_type_name) output_rule->rule_count++; output_rule->rules = (scc_rule*)realloc(output_rule->rules, output_rule->rule_count * sizeof(scc_rule)); current_rule = &output_rule->rules[output_rule->rule_count - 1]; current_rule->node_type_name = strdup(trimmed); current_rule->matchings = NULL; current_rule->matching_count = 0; } } } return true; } bool scc_read_rule_from_file(FILE *f, scc_rules *output_rule) { if (!f || !output_rule) return false; // Determine file size fseek(f, 0, SEEK_END); long size = ftell(f); fseek(f, 0, SEEK_SET); char* content = (char*)malloc(size + 1); if (!content) return false; size_t read_bytes = fread(content, 1, size, f); content[read_bytes] = '\0'; bool success = scc_read_rule_from_cstr(content, output_rule); free(content); return success; }