Files
scc/Source/scc_parser.c
2026-05-26 19:21:36 +10:00

217 lines
7.8 KiB
C

#include "../Headers/scc_core.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
// Helper: Trim leading and trailing whitespace
static char* trim_whitespace(char* str) {
while (isspace((unsigned char)*str)) {
str++;
}
if (*str == '\0') {
return str;
}
char* end = str + strlen(str) - 1;
while (end > str && isspace((unsigned char)*end)) {
end--;
}
*(end + 1) = '\0';
return str;
}
// Helper: Get next line from buffer
static bool get_next_line(char** cursor, char* line_buf, int max_len) {
char* c = *cursor;
if (*c == '\0') return false;
int idx = 0;
while (*c != '\0' && *c != '\n' && *c != '\r' && idx < max_len - 1) {
line_buf[idx++] = *c++;
}
line_buf[idx] = '\0';
// Skip newline characters
if (*c == '\r') c++;
if (*c == '\n') c++;
*cursor = c;
return true;
}
// Helper: Split line into tokens, respecting quotes
static void split_tokens(const char* str, char*** out_tokens, uint64_t* out_count) {
char** tokens = NULL;
uint64_t count = 0;
const char* p = str;
while (*p != '\0') {
while (isspace((unsigned char)*p)) p++;
if (*p == '\0') break;
const char* start = p;
if (*p == '"') {
p++;
while (*p != '\0' && *p != '"') p++;
if (*p == '"') p++;
} else if (*p == '\'') {
p++;
while (*p != '\0' && *p != '\'') p++;
if (*p == '\'') p++;
} else {
while (*p != '\0' && !isspace((unsigned char)*p)) p++;
}
size_t len = p - start;
char* token = (char*)malloc(len + 1);
memcpy(token, start, len);
token[len] = '\0';
count++;
tokens = (char**)realloc(tokens, count * sizeof(char*));
tokens[count - 1] = token;
}
*out_tokens = tokens;
*out_count = count;
}
bool scc_read_rule_from_cstr(char *content, scc_rules *output_rule) {
if (!content || !output_rule) return false;
// Initialize output structure
output_rule->rules = NULL;
output_rule->rule_count = 0;
output_rule->syntax_ids = NULL;
output_rule->syntax_id_count = 0;
typedef enum {
STATE_NONE,
STATE_SYNTAX_IDS,
STATE_RULES
} ParserState;
ParserState state = STATE_NONE;
scc_rule* current_rule = NULL;
char* cursor = content;
char line[4096];
while (get_next_line(&cursor, line, sizeof(line))) {
char* trimmed = trim_whitespace(line);
// Skip comments and empty lines
if (trimmed[0] == '\0' || trimmed[0] == '#' || (trimmed[0] == '/' && trimmed[1] == '/')) {
continue;
}
// Section switches
if (strcmp(trimmed, "syntax_ids:") == 0) {
state = STATE_SYNTAX_IDS;
continue;
} else if (strcmp(trimmed, "rules:") == 0) {
state = STATE_RULES;
continue;
}
if (state == STATE_SYNTAX_IDS) {
// Trimmed line is a syntax ID
output_rule->syntax_id_count++;
output_rule->syntax_ids = (char**)realloc(output_rule->syntax_ids, output_rule->syntax_id_count * sizeof(char*));
output_rule->syntax_ids[output_rule->syntax_id_count - 1] = strdup(trimmed);
} else if (state == STATE_RULES) {
if (strcmp(trimmed, ";") == 0) {
current_rule = NULL;
continue;
}
if (trimmed[0] == ':' || trimmed[0] == '|') {
if (current_rule) {
char* match_part = trimmed + 1;
char** match_tokens = NULL;
uint64_t match_token_count = 0;
split_tokens(match_part, &match_tokens, &match_token_count);
scc_matching* matching = (scc_matching*)malloc(sizeof(scc_matching));
matching->match_ids = match_tokens;
matching->match_id_count = match_token_count;
matching->target_syntax_id = NULL;
matching->using_match_id = NULL;
matching->using_match_id_count = 0;
current_rule->matching_count++;
current_rule->matchings = (scc_matching**)realloc(current_rule->matchings, current_rule->matching_count * sizeof(scc_matching*));
current_rule->matchings[current_rule->matching_count - 1] = matching;
}
} else if (strncmp(trimmed, "=>", 2) == 0) {
if (current_rule && current_rule->matching_count > 0) {
scc_matching* matching = current_rule->matchings[current_rule->matching_count - 1];
char* op_part = trimmed + 2;
char** op_tokens = NULL;
uint64_t op_token_count = 0;
split_tokens(op_part, &op_tokens, &op_token_count);
if (op_token_count > 0) {
if (strcmp(op_tokens[0], "new_node") == 0) {
if (op_token_count > 1) {
matching->target_syntax_id = strdup(op_tokens[1]);
if (op_token_count > 2) {
matching->using_match_id_count = op_token_count - 2;
matching->using_match_id = (char**)malloc(matching->using_match_id_count * sizeof(char*));
for (uint64_t i = 0; i < matching->using_match_id_count; i++) {
matching->using_match_id[i] = strdup(op_tokens[2 + i]);
}
}
}
} else if (strcmp(op_tokens[0], "append_as_child") == 0) {
matching->target_syntax_id = strdup("append_as_child");
if (op_token_count > 1) {
matching->using_match_id_count = op_token_count - 1;
matching->using_match_id = (char**)malloc(matching->using_match_id_count * sizeof(char*));
for (uint64_t i = 0; i < matching->using_match_id_count; i++) {
matching->using_match_id[i] = strdup(op_tokens[1 + i]);
}
}
} else if (strcmp(op_tokens[0], "skip") == 0) {
matching->target_syntax_id = strdup("skip");
}
}
// Free op_tokens
for (uint64_t i = 0; i < op_token_count; i++) {
free(op_tokens[i]);
}
free(op_tokens);
}
} else {
// Defines a new rule (node_type_name)
output_rule->rule_count++;
output_rule->rules = (scc_rule*)realloc(output_rule->rules, output_rule->rule_count * sizeof(scc_rule));
current_rule = &output_rule->rules[output_rule->rule_count - 1];
current_rule->node_type_name = strdup(trimmed);
current_rule->matchings = NULL;
current_rule->matching_count = 0;
}
}
}
return true;
}
bool scc_read_rule_from_file(FILE *f, scc_rules *output_rule) {
if (!f || !output_rule) return false;
// Determine file size
fseek(f, 0, SEEK_END);
long size = ftell(f);
fseek(f, 0, SEEK_SET);
char* content = (char*)malloc(size + 1);
if (!content) return false;
size_t read_bytes = fread(content, 1, size, f);
content[read_bytes] = '\0';
bool success = scc_read_rule_from_cstr(content, output_rule);
free(content);
return success;
}