Implemented with Antigravity.
This commit is contained in:
@@ -0,0 +1,216 @@
|
||||
#include "../Headers/scc_core.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
// Helper: Trim leading and trailing whitespace
|
||||
static char* trim_whitespace(char* str) {
|
||||
while (isspace((unsigned char)*str)) {
|
||||
str++;
|
||||
}
|
||||
if (*str == '\0') {
|
||||
return str;
|
||||
}
|
||||
char* end = str + strlen(str) - 1;
|
||||
while (end > str && isspace((unsigned char)*end)) {
|
||||
end--;
|
||||
}
|
||||
*(end + 1) = '\0';
|
||||
return str;
|
||||
}
|
||||
|
||||
// Helper: Get next line from buffer
|
||||
static bool get_next_line(char** cursor, char* line_buf, int max_len) {
|
||||
char* c = *cursor;
|
||||
if (*c == '\0') return false;
|
||||
int idx = 0;
|
||||
while (*c != '\0' && *c != '\n' && *c != '\r' && idx < max_len - 1) {
|
||||
line_buf[idx++] = *c++;
|
||||
}
|
||||
line_buf[idx] = '\0';
|
||||
|
||||
// Skip newline characters
|
||||
if (*c == '\r') c++;
|
||||
if (*c == '\n') c++;
|
||||
|
||||
*cursor = c;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Helper: Split line into tokens, respecting quotes
|
||||
static void split_tokens(const char* str, char*** out_tokens, uint64_t* out_count) {
|
||||
char** tokens = NULL;
|
||||
uint64_t count = 0;
|
||||
const char* p = str;
|
||||
while (*p != '\0') {
|
||||
while (isspace((unsigned char)*p)) p++;
|
||||
if (*p == '\0') break;
|
||||
|
||||
const char* start = p;
|
||||
if (*p == '"') {
|
||||
p++;
|
||||
while (*p != '\0' && *p != '"') p++;
|
||||
if (*p == '"') p++;
|
||||
} else if (*p == '\'') {
|
||||
p++;
|
||||
while (*p != '\0' && *p != '\'') p++;
|
||||
if (*p == '\'') p++;
|
||||
} else {
|
||||
while (*p != '\0' && !isspace((unsigned char)*p)) p++;
|
||||
}
|
||||
|
||||
size_t len = p - start;
|
||||
char* token = (char*)malloc(len + 1);
|
||||
memcpy(token, start, len);
|
||||
token[len] = '\0';
|
||||
|
||||
count++;
|
||||
tokens = (char**)realloc(tokens, count * sizeof(char*));
|
||||
tokens[count - 1] = token;
|
||||
}
|
||||
*out_tokens = tokens;
|
||||
*out_count = count;
|
||||
}
|
||||
|
||||
bool scc_read_rule_from_cstr(char *content, scc_rules *output_rule) {
|
||||
if (!content || !output_rule) return false;
|
||||
|
||||
// Initialize output structure
|
||||
output_rule->rules = NULL;
|
||||
output_rule->rule_count = 0;
|
||||
output_rule->syntax_ids = NULL;
|
||||
output_rule->syntax_id_count = 0;
|
||||
|
||||
typedef enum {
|
||||
STATE_NONE,
|
||||
STATE_SYNTAX_IDS,
|
||||
STATE_RULES
|
||||
} ParserState;
|
||||
|
||||
ParserState state = STATE_NONE;
|
||||
scc_rule* current_rule = NULL;
|
||||
|
||||
char* cursor = content;
|
||||
char line[4096];
|
||||
|
||||
while (get_next_line(&cursor, line, sizeof(line))) {
|
||||
char* trimmed = trim_whitespace(line);
|
||||
|
||||
// Skip comments and empty lines
|
||||
if (trimmed[0] == '\0' || trimmed[0] == '#' || (trimmed[0] == '/' && trimmed[1] == '/')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Section switches
|
||||
if (strcmp(trimmed, "syntax_ids:") == 0) {
|
||||
state = STATE_SYNTAX_IDS;
|
||||
continue;
|
||||
} else if (strcmp(trimmed, "rules:") == 0) {
|
||||
state = STATE_RULES;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (state == STATE_SYNTAX_IDS) {
|
||||
// Trimmed line is a syntax ID
|
||||
output_rule->syntax_id_count++;
|
||||
output_rule->syntax_ids = (char**)realloc(output_rule->syntax_ids, output_rule->syntax_id_count * sizeof(char*));
|
||||
output_rule->syntax_ids[output_rule->syntax_id_count - 1] = strdup(trimmed);
|
||||
} else if (state == STATE_RULES) {
|
||||
if (strcmp(trimmed, ";") == 0) {
|
||||
current_rule = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trimmed[0] == ':' || trimmed[0] == '|') {
|
||||
if (current_rule) {
|
||||
char* match_part = trimmed + 1;
|
||||
char** match_tokens = NULL;
|
||||
uint64_t match_token_count = 0;
|
||||
split_tokens(match_part, &match_tokens, &match_token_count);
|
||||
|
||||
scc_matching* matching = (scc_matching*)malloc(sizeof(scc_matching));
|
||||
matching->match_ids = match_tokens;
|
||||
matching->match_id_count = match_token_count;
|
||||
matching->target_syntax_id = NULL;
|
||||
matching->using_match_id = NULL;
|
||||
matching->using_match_id_count = 0;
|
||||
|
||||
current_rule->matching_count++;
|
||||
current_rule->matchings = (scc_matching**)realloc(current_rule->matchings, current_rule->matching_count * sizeof(scc_matching*));
|
||||
current_rule->matchings[current_rule->matching_count - 1] = matching;
|
||||
}
|
||||
} else if (strncmp(trimmed, "=>", 2) == 0) {
|
||||
if (current_rule && current_rule->matching_count > 0) {
|
||||
scc_matching* matching = current_rule->matchings[current_rule->matching_count - 1];
|
||||
char* op_part = trimmed + 2;
|
||||
char** op_tokens = NULL;
|
||||
uint64_t op_token_count = 0;
|
||||
split_tokens(op_part, &op_tokens, &op_token_count);
|
||||
|
||||
if (op_token_count > 0) {
|
||||
if (strcmp(op_tokens[0], "new_node") == 0) {
|
||||
if (op_token_count > 1) {
|
||||
matching->target_syntax_id = strdup(op_tokens[1]);
|
||||
if (op_token_count > 2) {
|
||||
matching->using_match_id_count = op_token_count - 2;
|
||||
matching->using_match_id = (char**)malloc(matching->using_match_id_count * sizeof(char*));
|
||||
for (uint64_t i = 0; i < matching->using_match_id_count; i++) {
|
||||
matching->using_match_id[i] = strdup(op_tokens[2 + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (strcmp(op_tokens[0], "append_as_child") == 0) {
|
||||
matching->target_syntax_id = strdup("append_as_child");
|
||||
if (op_token_count > 1) {
|
||||
matching->using_match_id_count = op_token_count - 1;
|
||||
matching->using_match_id = (char**)malloc(matching->using_match_id_count * sizeof(char*));
|
||||
for (uint64_t i = 0; i < matching->using_match_id_count; i++) {
|
||||
matching->using_match_id[i] = strdup(op_tokens[1 + i]);
|
||||
}
|
||||
}
|
||||
} else if (strcmp(op_tokens[0], "skip") == 0) {
|
||||
matching->target_syntax_id = strdup("skip");
|
||||
}
|
||||
}
|
||||
|
||||
// Free op_tokens
|
||||
for (uint64_t i = 0; i < op_token_count; i++) {
|
||||
free(op_tokens[i]);
|
||||
}
|
||||
free(op_tokens);
|
||||
}
|
||||
} else {
|
||||
// Defines a new rule (node_type_name)
|
||||
output_rule->rule_count++;
|
||||
output_rule->rules = (scc_rule*)realloc(output_rule->rules, output_rule->rule_count * sizeof(scc_rule));
|
||||
|
||||
current_rule = &output_rule->rules[output_rule->rule_count - 1];
|
||||
current_rule->node_type_name = strdup(trimmed);
|
||||
current_rule->matchings = NULL;
|
||||
current_rule->matching_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool scc_read_rule_from_file(FILE *f, scc_rules *output_rule) {
|
||||
if (!f || !output_rule) return false;
|
||||
|
||||
// Determine file size
|
||||
fseek(f, 0, SEEK_END);
|
||||
long size = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
|
||||
char* content = (char*)malloc(size + 1);
|
||||
if (!content) return false;
|
||||
|
||||
size_t read_bytes = fread(content, 1, size, f);
|
||||
content[read_bytes] = '\0';
|
||||
|
||||
bool success = scc_read_rule_from_cstr(content, output_rule);
|
||||
free(content);
|
||||
return success;
|
||||
}
|
||||
Reference in New Issue
Block a user