217 lines
7.8 KiB
C
217 lines
7.8 KiB
C
#include "../Headers/scc_core.h"
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
|
|
// Helper: Trim leading and trailing whitespace
|
|
static char* trim_whitespace(char* str) {
|
|
while (isspace((unsigned char)*str)) {
|
|
str++;
|
|
}
|
|
if (*str == '\0') {
|
|
return str;
|
|
}
|
|
char* end = str + strlen(str) - 1;
|
|
while (end > str && isspace((unsigned char)*end)) {
|
|
end--;
|
|
}
|
|
*(end + 1) = '\0';
|
|
return str;
|
|
}
|
|
|
|
// Helper: Get next line from buffer
|
|
static bool get_next_line(char** cursor, char* line_buf, int max_len) {
|
|
char* c = *cursor;
|
|
if (*c == '\0') return false;
|
|
int idx = 0;
|
|
while (*c != '\0' && *c != '\n' && *c != '\r' && idx < max_len - 1) {
|
|
line_buf[idx++] = *c++;
|
|
}
|
|
line_buf[idx] = '\0';
|
|
|
|
// Skip newline characters
|
|
if (*c == '\r') c++;
|
|
if (*c == '\n') c++;
|
|
|
|
*cursor = c;
|
|
return true;
|
|
}
|
|
|
|
// Helper: Split line into tokens, respecting quotes
|
|
static void split_tokens(const char* str, char*** out_tokens, uint64_t* out_count) {
|
|
char** tokens = NULL;
|
|
uint64_t count = 0;
|
|
const char* p = str;
|
|
while (*p != '\0') {
|
|
while (isspace((unsigned char)*p)) p++;
|
|
if (*p == '\0') break;
|
|
|
|
const char* start = p;
|
|
if (*p == '"') {
|
|
p++;
|
|
while (*p != '\0' && *p != '"') p++;
|
|
if (*p == '"') p++;
|
|
} else if (*p == '\'') {
|
|
p++;
|
|
while (*p != '\0' && *p != '\'') p++;
|
|
if (*p == '\'') p++;
|
|
} else {
|
|
while (*p != '\0' && !isspace((unsigned char)*p)) p++;
|
|
}
|
|
|
|
size_t len = p - start;
|
|
char* token = (char*)malloc(len + 1);
|
|
memcpy(token, start, len);
|
|
token[len] = '\0';
|
|
|
|
count++;
|
|
tokens = (char**)realloc(tokens, count * sizeof(char*));
|
|
tokens[count - 1] = token;
|
|
}
|
|
*out_tokens = tokens;
|
|
*out_count = count;
|
|
}
|
|
|
|
bool scc_read_rule_from_cstr(char *content, scc_rules *output_rule) {
|
|
if (!content || !output_rule) return false;
|
|
|
|
// Initialize output structure
|
|
output_rule->rules = NULL;
|
|
output_rule->rule_count = 0;
|
|
output_rule->syntax_ids = NULL;
|
|
output_rule->syntax_id_count = 0;
|
|
|
|
typedef enum {
|
|
STATE_NONE,
|
|
STATE_SYNTAX_IDS,
|
|
STATE_RULES
|
|
} ParserState;
|
|
|
|
ParserState state = STATE_NONE;
|
|
scc_rule* current_rule = NULL;
|
|
|
|
char* cursor = content;
|
|
char line[4096];
|
|
|
|
while (get_next_line(&cursor, line, sizeof(line))) {
|
|
char* trimmed = trim_whitespace(line);
|
|
|
|
// Skip comments and empty lines
|
|
if (trimmed[0] == '\0' || trimmed[0] == '#' || (trimmed[0] == '/' && trimmed[1] == '/')) {
|
|
continue;
|
|
}
|
|
|
|
// Section switches
|
|
if (strcmp(trimmed, "syntax_ids:") == 0) {
|
|
state = STATE_SYNTAX_IDS;
|
|
continue;
|
|
} else if (strcmp(trimmed, "rules:") == 0) {
|
|
state = STATE_RULES;
|
|
continue;
|
|
}
|
|
|
|
if (state == STATE_SYNTAX_IDS) {
|
|
// Trimmed line is a syntax ID
|
|
output_rule->syntax_id_count++;
|
|
output_rule->syntax_ids = (char**)realloc(output_rule->syntax_ids, output_rule->syntax_id_count * sizeof(char*));
|
|
output_rule->syntax_ids[output_rule->syntax_id_count - 1] = strdup(trimmed);
|
|
} else if (state == STATE_RULES) {
|
|
if (strcmp(trimmed, ";") == 0) {
|
|
current_rule = NULL;
|
|
continue;
|
|
}
|
|
|
|
if (trimmed[0] == ':' || trimmed[0] == '|') {
|
|
if (current_rule) {
|
|
char* match_part = trimmed + 1;
|
|
char** match_tokens = NULL;
|
|
uint64_t match_token_count = 0;
|
|
split_tokens(match_part, &match_tokens, &match_token_count);
|
|
|
|
scc_matching* matching = (scc_matching*)malloc(sizeof(scc_matching));
|
|
matching->match_ids = match_tokens;
|
|
matching->match_id_count = match_token_count;
|
|
matching->target_syntax_id = NULL;
|
|
matching->using_match_id = NULL;
|
|
matching->using_match_id_count = 0;
|
|
|
|
current_rule->matching_count++;
|
|
current_rule->matchings = (scc_matching**)realloc(current_rule->matchings, current_rule->matching_count * sizeof(scc_matching*));
|
|
current_rule->matchings[current_rule->matching_count - 1] = matching;
|
|
}
|
|
} else if (strncmp(trimmed, "=>", 2) == 0) {
|
|
if (current_rule && current_rule->matching_count > 0) {
|
|
scc_matching* matching = current_rule->matchings[current_rule->matching_count - 1];
|
|
char* op_part = trimmed + 2;
|
|
char** op_tokens = NULL;
|
|
uint64_t op_token_count = 0;
|
|
split_tokens(op_part, &op_tokens, &op_token_count);
|
|
|
|
if (op_token_count > 0) {
|
|
if (strcmp(op_tokens[0], "new_node") == 0) {
|
|
if (op_token_count > 1) {
|
|
matching->target_syntax_id = strdup(op_tokens[1]);
|
|
if (op_token_count > 2) {
|
|
matching->using_match_id_count = op_token_count - 2;
|
|
matching->using_match_id = (char**)malloc(matching->using_match_id_count * sizeof(char*));
|
|
for (uint64_t i = 0; i < matching->using_match_id_count; i++) {
|
|
matching->using_match_id[i] = strdup(op_tokens[2 + i]);
|
|
}
|
|
}
|
|
}
|
|
} else if (strcmp(op_tokens[0], "append_as_child") == 0) {
|
|
matching->target_syntax_id = strdup("append_as_child");
|
|
if (op_token_count > 1) {
|
|
matching->using_match_id_count = op_token_count - 1;
|
|
matching->using_match_id = (char**)malloc(matching->using_match_id_count * sizeof(char*));
|
|
for (uint64_t i = 0; i < matching->using_match_id_count; i++) {
|
|
matching->using_match_id[i] = strdup(op_tokens[1 + i]);
|
|
}
|
|
}
|
|
} else if (strcmp(op_tokens[0], "skip") == 0) {
|
|
matching->target_syntax_id = strdup("skip");
|
|
}
|
|
}
|
|
|
|
// Free op_tokens
|
|
for (uint64_t i = 0; i < op_token_count; i++) {
|
|
free(op_tokens[i]);
|
|
}
|
|
free(op_tokens);
|
|
}
|
|
} else {
|
|
// Defines a new rule (node_type_name)
|
|
output_rule->rule_count++;
|
|
output_rule->rules = (scc_rule*)realloc(output_rule->rules, output_rule->rule_count * sizeof(scc_rule));
|
|
|
|
current_rule = &output_rule->rules[output_rule->rule_count - 1];
|
|
current_rule->node_type_name = strdup(trimmed);
|
|
current_rule->matchings = NULL;
|
|
current_rule->matching_count = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool scc_read_rule_from_file(FILE *f, scc_rules *output_rule) {
|
|
if (!f || !output_rule) return false;
|
|
|
|
// Determine file size
|
|
fseek(f, 0, SEEK_END);
|
|
long size = ftell(f);
|
|
fseek(f, 0, SEEK_SET);
|
|
|
|
char* content = (char*)malloc(size + 1);
|
|
if (!content) return false;
|
|
|
|
size_t read_bytes = fread(content, 1, size, f);
|
|
content[read_bytes] = '\0';
|
|
|
|
bool success = scc_read_rule_from_cstr(content, output_rule);
|
|
free(content);
|
|
return success;
|
|
}
|