Implemented with Antigravity.

This commit is contained in:
Creeper Lv
2026-05-26 19:21:36 +10:00
parent 873ac50dee
commit 2d90caa285
6 changed files with 1283 additions and 5 deletions
+1
View File
@@ -20,6 +20,7 @@ typedef struct scc_options
char *slex_class_name;
char *slex_prefix;
char *slex_data_type_name;
char *slex_header;
} scc_options;
typedef struct scc_matching
+473 -2
View File
@@ -1,6 +1,477 @@
#include "../../../Headers/scc_core.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
bool scc_translate_to_file_c(scc_options *options, scc_rules *rules, FILE *output_file){
//Stub for now.
static bool is_rule_name(scc_rules* rules, const char* name) {
for (uint64_t i = 0; i < rules->rule_count; i++) {
if (strcmp(rules->rules[i].node_type_name, name) == 0) {
return true;
}
}
return false;
}
static bool is_syntax_id(scc_rules* rules, const char* name) {
for (uint64_t i = 0; i < rules->syntax_id_count; i++) {
if (strcmp(rules->syntax_ids[i], name) == 0) {
return true;
}
}
return false;
}
static void print_safe_c_string(FILE* f, const char* str) {
fputc('"', f);
for (int i = 0; str[i] != '\0'; i++) {
if (str[i] == '"') {
fprintf(f, "\\\"");
} else if (str[i] == '\\') {
fprintf(f, "\\\\");
} else {
fputc(str[i], f);
}
}
fputc('"', f);
}
static void generate_declarations(FILE* f, scc_options* options, scc_rules* rules) {
char* prefix = options->prefix ? options->prefix : "scc_";
char* data_type = options->data_type_name ? options->data_type_name : "scc_syntax_node";
char* slex_data_type = options->slex_data_type_name ? options->slex_data_type_name : "slex_segment";
fprintf(f, "#include <stdint.h>\n");
fprintf(f, "#include <stdbool.h>\n");
fprintf(f, "#include <stdio.h>\n\n");
// Forward declare slex segment
fprintf(f, "struct %s;\n", slex_data_type);
fprintf(f, "typedef struct %s %s;\n\n", slex_data_type, slex_data_type);
// scc_syntax_id enum
fprintf(f, "typedef enum %ssyntax_id {\n", prefix);
fprintf(f, " %sid_default = 0,\n", prefix);
for (uint64_t i = 0; i < rules->syntax_id_count; i++) {
fprintf(f, " %sid_%s,\n", prefix, rules->syntax_ids[i]);
}
fprintf(f, "} %ssyntax_id;\n\n", prefix);
// scc_syntax_node_type enum
fprintf(f, "typedef enum %ssyntax_node_type {\n", prefix);
fprintf(f, " %snode,\n", prefix);
fprintf(f, " %ssegment\n", prefix);
fprintf(f, "} %ssyntax_node_type;\n\n", prefix);
// scc_syntax_node_enclosure struct forward decl
fprintf(f, "struct %ssyntax_node_enclosure;\n\n", prefix);
// scc_syntax_node struct
fprintf(f, "typedef struct %s {\n", data_type);
fprintf(f, " enum %ssyntax_id id;\n", prefix);
fprintf(f, " char* syntax_name;\n");
fprintf(f, " struct %ssyntax_node_enclosure* children;\n", prefix);
fprintf(f, " uint64_t child_count;\n");
fprintf(f, " struct %s* parent;\n", data_type);
fprintf(f, "} %s;\n\n", data_type);
// scc_syntax_node_enclosure struct
fprintf(f, "typedef struct %ssyntax_node_enclosure {\n", prefix);
fprintf(f, " enum %ssyntax_node_type type;\n", prefix);
fprintf(f, " void* data;\n");
fprintf(f, "} %ssyntax_node_enclosure;\n\n", prefix);
// Matching structures definitions
fprintf(f, "typedef struct %smatching_rule_def {\n", prefix);
fprintf(f, " const char* target_syntax_id;\n");
fprintf(f, " const char** match_ids;\n");
fprintf(f, " uint64_t match_id_count;\n");
fprintf(f, " const char** using_match_ids;\n");
fprintf(f, " uint64_t using_match_id_count;\n");
fprintf(f, "} %smatching_rule_def;\n\n", prefix);
fprintf(f, "typedef struct %srule_def {\n", prefix);
fprintf(f, " const char* node_type_name;\n");
fprintf(f, " const struct %smatching_rule_def* matchings;\n", prefix);
fprintf(f, " uint64_t matching_count;\n");
fprintf(f, "} %srule_def;\n\n", prefix);
// Function declarations
fprintf(f, "char %sparse(%s* head, %s** output);\n", prefix, slex_data_type, data_type);
fprintf(f, "char %sfree(%s* root);\n\n", prefix, data_type);
}
bool scc_translate_to_file_c(scc_options *options, scc_rules *rules, FILE *output_file) {
if (!options || !rules || !output_file) return false;
char* prefix = options->prefix ? options->prefix : "scc_";
char* data_type = options->data_type_name ? options->data_type_name : "scc_syntax_node";
char* slex_prefix = options->slex_prefix ? options->slex_prefix : "slex_";
char* slex_data_type = options->slex_data_type_name ? options->slex_data_type_name : "slex_segment";
// 1. Open header file if requested
FILE* header_f = NULL;
char* header_base = NULL;
if (options->header_output && strlen(options->header_output) > 0) {
header_f = fopen(options->header_output, "w");
if (!header_f) return false;
char* last_slash = strrchr(options->header_output, '/');
char* last_backslash = strrchr(options->header_output, '\\');
char* base = options->header_output;
if (last_slash && last_slash > base) base = last_slash + 1;
if (last_backslash && last_backslash > base) base = last_backslash + 1;
header_base = strdup(base);
fprintf(header_f, "#ifndef __SCC_GENERATED_H__\n");
fprintf(header_f, "#define __SCC_GENERATED_H__\n\n");
generate_declarations(header_f, options, rules);
fprintf(header_f, "#endif\n");
fclose(header_f);
}
// 2. Write implementation
fprintf(output_file, "#include <stdio.h>\n");
fprintf(output_file, "#include <stdlib.h>\n");
fprintf(output_file, "#include <string.h>\n");
fprintf(output_file, "#include <stdbool.h>\n");
if (options->slex_header && strlen(options->slex_header) > 0) {
fprintf(output_file, "#include \"%s\"\n\n", options->slex_header);
} else {
fprintf(output_file, "#include \"slex_generated.h\"\n\n");
}
if (header_base) {
fprintf(output_file, "#include \"%s\"\n\n", header_base);
free(header_base);
} else {
generate_declarations(output_file, options, rules);
}
// Generate unique slex tags/ids list used as terminals
// We scan all rules' matchings to collect unique non-rule, non-quoted match_ids
char** terminals = NULL;
uint64_t terminal_count = 0;
for (uint64_t r = 0; r < rules->rule_count; r++) {
scc_rule* rule = &rules->rules[r];
for (uint64_t m = 0; m < rule->matching_count; m++) {
scc_matching* matching = rule->matchings[m];
for (uint64_t i = 0; i < matching->match_id_count; i++) {
char* match_id = matching->match_ids[i];
size_t len = strlen(match_id);
bool is_literal = len >= 2 && ((match_id[0] == '"' && match_id[len-1] == '"') || (match_id[0] == '\'' && match_id[len-1] == '\''));
if (!is_literal && !is_rule_name(rules, match_id)) {
// Check if already in list
bool exists = false;
for (uint64_t j = 0; j < terminal_count; j++) {
if (strcmp(terminals[j], match_id) == 0) {
exists = true;
break;
}
}
if (!exists) {
terminal_count++;
terminals = (char**)realloc(terminals, terminal_count * sizeof(char*));
terminals[terminal_count - 1] = match_id;
}
}
}
}
}
// Helper functions for matching
fprintf(output_file, "static bool match_element(const %ssyntax_node_enclosure* enc, const char* pattern) {\n", prefix);
fprintf(output_file, " if (!enc || !pattern) return false;\n");
fprintf(output_file, " if (enc->type == %snode) {\n", prefix);
fprintf(output_file, " %s* node = (%s*)enc->data;\n", data_type, data_type);
fprintf(output_file, " return strcmp(node->syntax_name, pattern) == 0;\n");
fprintf(output_file, " } else {\n");
fprintf(output_file, " %s* segment = (%s*)enc->data;\n", slex_data_type, slex_data_type);
fprintf(output_file, " size_t len = strlen(pattern);\n");
fprintf(output_file, " if (len >= 2 && ((pattern[0] == '\"' && pattern[len-1] == '\"') || (pattern[0] == '\\'' && pattern[len-1] == '\\''))) {\n");
fprintf(output_file, " return (segment->length == (int64_t)(len - 2)) && (strncmp(segment->head, pattern + 1, len - 2) == 0);\n");
fprintf(output_file, " }\n");
for (uint64_t i = 0; i < terminal_count; i++) {
fprintf(output_file, " if (strcmp(pattern, \"%s\") == 0) {\n", terminals[i]);
fprintf(output_file, " return segment->tag == %stag_%s || segment->id == %sid_%s;\n", slex_prefix, terminals[i], slex_prefix, terminals[i]);
fprintf(output_file, " }\n");
}
fprintf(output_file, " return false;\n");
fprintf(output_file, " }\n");
fprintf(output_file, "}\n\n");
if (terminals) free(terminals);
// get_syntax_id function
fprintf(output_file, "static enum %ssyntax_id get_syntax_id(const char* name) {\n", prefix);
for (uint64_t i = 0; i < rules->syntax_id_count; i++) {
fprintf(output_file, " if (strcmp(name, \"%s\") == 0) return %sid_%s;\n", rules->syntax_ids[i], prefix, rules->syntax_ids[i]);
}
fprintf(output_file, " return %sid_default;\n", prefix);
fprintf(output_file, "}\n\n");
// clone_segment function
fprintf(output_file, "static %s* clone_segment(const %s* src) {\n", slex_data_type, slex_data_type);
fprintf(output_file, " if (!src) return NULL;\n");
fprintf(output_file, " %s* dst = (%s*)malloc(sizeof(%s));\n", slex_data_type, slex_data_type, slex_data_type);
fprintf(output_file, " dst->head = src->head ? strdup(src->head) : NULL;\n");
fprintf(output_file, " dst->length = src->length;\n");
fprintf(output_file, " dst->file_name = src->file_name ? strdup(src->file_name) : NULL;\n");
fprintf(output_file, " dst->line = src->line;\n");
fprintf(output_file, " dst->col = src->col;\n");
fprintf(output_file, " dst->tag = src->tag;\n");
fprintf(output_file, " dst->id = src->id;\n");
fprintf(output_file, " dst->prev = NULL;\n");
fprintf(output_file, " dst->next = NULL;\n");
fprintf(output_file, " return dst;\n");
fprintf(output_file, "}\n\n");
// free_segment function
fprintf(output_file, "static void free_segment(%s* seg) {\n", slex_data_type);
fprintf(output_file, " if (!seg) return;\n");
fprintf(output_file, " if (seg->head) free(seg->head);\n");
fprintf(output_file, " if (seg->file_name) free(seg->file_name);\n");
fprintf(output_file, " free(seg);\n");
fprintf(output_file, "}\n\n");
// clone_tree and free_tree functions forward declaration
fprintf(output_file, "static %s* clone_tree(const %s* src);\n", data_type, data_type);
fprintf(output_file, "static void free_tree(%s* node);\n\n", data_type);
// clone_tree implementation
fprintf(output_file, "static %s* clone_tree(const %s* src) {\n", data_type, data_type);
fprintf(output_file, " if (!src) return NULL;\n");
fprintf(output_file, " %s* dst = (%s*)malloc(sizeof(%s));\n", data_type, data_type, data_type);
fprintf(output_file, " dst->id = src->id;\n");
fprintf(output_file, " dst->syntax_name = src->syntax_name ? strdup(src->syntax_name) : NULL;\n");
fprintf(output_file, " dst->child_count = src->child_count;\n");
fprintf(output_file, " if (dst->child_count > 0) {\n");
fprintf(output_file, " dst->children = (%ssyntax_node_enclosure*)malloc(dst->child_count * sizeof(%ssyntax_node_enclosure));\n", prefix, prefix);
fprintf(output_file, " for (uint64_t i = 0; i < dst->child_count; i++) {\n");
fprintf(output_file, " dst->children[i].type = src->children[i].type;\n");
fprintf(output_file, " if (src->children[i].type == %snode) {\n", prefix);
fprintf(output_file, " dst->children[i].data = clone_tree((%s*)src->children[i].data);\n", data_type);
fprintf(output_file, " ((%s*)dst->children[i].data)->parent = dst;\n", data_type);
fprintf(output_file, " } else {\n");
fprintf(output_file, " dst->children[i].data = clone_segment((%s*)src->children[i].data);\n", slex_data_type);
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " } else {\n");
fprintf(output_file, " dst->children = NULL;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " dst->parent = NULL;\n");
fprintf(output_file, " return dst;\n");
fprintf(output_file, "}\n\n");
// free_tree implementation
fprintf(output_file, "static void free_tree(%s* node) {\n", data_type);
fprintf(output_file, " if (!node) return;\n");
fprintf(output_file, " if (node->syntax_name) free(node->syntax_name);\n");
fprintf(output_file, " for (uint64_t i = 0; i < node->child_count; i++) {\n");
fprintf(output_file, " if (node->children[i].type == %snode) {\n", prefix);
fprintf(output_file, " free_tree((%s*)node->children[i].data);\n", data_type);
fprintf(output_file, " } else {\n");
fprintf(output_file, " free_segment((%s*)node->children[i].data);\n", slex_data_type);
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " if (node->children) free(node->children);\n");
fprintf(output_file, " free(node);\n");
fprintf(output_file, "}\n\n");
// Generate matching rules tables
for (uint64_t r = 0; r < rules->rule_count; r++) {
scc_rule* rule = &rules->rules[r];
for (uint64_t m = 0; m < rule->matching_count; m++) {
scc_matching* matching = rule->matchings[m];
fprintf(output_file, "static const char* match_ids_%d_%d[] = {", (int)r, (int)m);
for (uint64_t i = 0; i < matching->match_id_count; i++) {
print_safe_c_string(output_file, matching->match_ids[i]);
if (i < matching->match_id_count - 1) fprintf(output_file, ", ");
}
fprintf(output_file, "};\n");
fprintf(output_file, "static const char* using_match_ids_%d_%d[] = {", (int)r, (int)m);
for (uint64_t i = 0; i < matching->using_match_id_count; i++) {
print_safe_c_string(output_file, matching->using_match_id[i]);
if (i < matching->using_match_id_count - 1) fprintf(output_file, ", ");
}
fprintf(output_file, "};\n");
}
if (rule->matching_count > 0) {
fprintf(output_file, "static const %smatching_rule_def matchings_%d[] = {\n", prefix, (int)r);
for (uint64_t m = 0; m < rule->matching_count; m++) {
scc_matching* matching = rule->matchings[m];
fprintf(output_file, " { ");
print_safe_c_string(output_file, matching->target_syntax_id ? matching->target_syntax_id : "skip");
fprintf(output_file, ", match_ids_%d_%d, %d, using_match_ids_%d_%d, %d }",
(int)r, (int)m, (int)matching->match_id_count,
(int)r, (int)m, (int)matching->using_match_id_count);
if (m < rule->matching_count - 1) fprintf(output_file, ",\n");
}
fprintf(output_file, "\n};\n");
}
}
// RULES array
fprintf(output_file, "static const %srule_def RULES[] = {\n", prefix);
for (uint64_t r = 0; r < rules->rule_count; r++) {
scc_rule* rule = &rules->rules[r];
if (rule->matching_count > 0) {
fprintf(output_file, " { ");
print_safe_c_string(output_file, rule->node_type_name);
fprintf(output_file, ", matchings_%d, %d }", (int)r, (int)rule->matching_count);
} else {
fprintf(output_file, " { ");
print_safe_c_string(output_file, rule->node_type_name);
fprintf(output_file, ", NULL, 0 }");
}
if (r < rules->rule_count - 1) fprintf(output_file, ",\n");
}
fprintf(output_file, "\n};\n\n");
// parse_step function
fprintf(output_file, "static bool parse_step(%ssyntax_node_enclosure* stack, uint64_t stack_len, %s* input, %s** result, int depth) {\n", prefix, slex_data_type, data_type);
fprintf(output_file, " if (depth > 2000) return false;\n\n");
fprintf(output_file, " if (input == NULL) {\n");
fprintf(output_file, " if (stack_len == 1 && stack[0].type == %snode) {\n", prefix);
fprintf(output_file, " %s* node = (%s*)stack[0].data;\n", data_type, data_type);
fprintf(output_file, " if (strcmp(node->syntax_name, \"%s\") == 0) {\n", rules->rules[0].node_type_name);
fprintf(output_file, " *result = node;\n");
fprintf(output_file, " return true;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n\n");
// Try reductions
fprintf(output_file, " for (uint64_t r = 0; r < %d; r++) {\n", (int)rules->rule_count);
fprintf(output_file, " const %srule_def* rule = &RULES[r];\n", prefix);
fprintf(output_file, " for (uint64_t m = 0; m < rule->matching_count; m++) {\n");
fprintf(output_file, " const %smatching_rule_def* match = &rule->matchings[m];\n", prefix);
fprintf(output_file, " if (stack_len >= match->match_id_count) {\n");
fprintf(output_file, " bool match_ok = true;\n");
fprintf(output_file, " uint64_t stack_offset = stack_len - match->match_id_count;\n");
fprintf(output_file, " for (uint64_t i = 0; i < match->match_id_count; i++) {\n");
fprintf(output_file, " if (!match_element(&stack[stack_offset + i], match->match_ids[i])) {\n");
fprintf(output_file, " match_ok = false;\n");
fprintf(output_file, " break;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " if (match_ok) {\n");
fprintf(output_file, " uint64_t new_stack_len = stack_offset;\n");
fprintf(output_file, " bool skip_push = false;\n");
fprintf(output_file, " %ssyntax_node_enclosure reduced_enc;\n", prefix);
fprintf(output_file, " reduced_enc.type = %snode;\n", prefix);
fprintf(output_file, " reduced_enc.data = NULL;\n\n");
fprintf(output_file, " if (strcmp(match->target_syntax_id, \"skip\") == 0) {\n");
fprintf(output_file, " skip_push = true;\n");
fprintf(output_file, " } else if (strcmp(match->target_syntax_id, \"append_as_child\") == 0) {\n");
fprintf(output_file, " if (match->match_id_count >= 1 && stack[stack_offset].type == %snode) {\n", prefix);
fprintf(output_file, " %s* parent_node = (%s*)stack[stack_offset].data;\n", data_type, data_type);
fprintf(output_file, " %s* cloned_parent = clone_tree(parent_node);\n", data_type);
fprintf(output_file, " if (match->using_match_id_count >= 1) {\n");
fprintf(output_file, " int child_idx = atoi(match->using_match_ids[0] + 1);\n");
fprintf(output_file, " %ssyntax_node_enclosure child_enc = stack[stack_offset + child_idx];\n", prefix);
fprintf(output_file, " if (child_enc.type == %snode) {\n", prefix);
fprintf(output_file, " child_enc.data = clone_tree((%s*)child_enc.data);\n", data_type);
fprintf(output_file, " } else {\n");
fprintf(output_file, " child_enc.data = clone_segment((%s*)child_enc.data);\n", slex_data_type);
fprintf(output_file, " }\n");
fprintf(output_file, " cloned_parent->child_count++;\n");
fprintf(output_file, " cloned_parent->children = (%ssyntax_node_enclosure*)realloc(cloned_parent->children, cloned_parent->child_count * sizeof(%ssyntax_node_enclosure));\n", prefix, prefix);
fprintf(output_file, " cloned_parent->children[cloned_parent->child_count - 1] = child_enc;\n");
fprintf(output_file, " if (child_enc.type == %snode) {\n", prefix);
fprintf(output_file, " ((%s*)child_enc.data)->parent = cloned_parent;\n", data_type);
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " reduced_enc.data = cloned_parent;\n");
fprintf(output_file, " new_stack_len++;\n");
fprintf(output_file, " } else {\n");
fprintf(output_file, " match_ok = false;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " } else {\n");
fprintf(output_file, " %s* new_node = (%s*)malloc(sizeof(%s));\n", data_type, data_type, data_type);
fprintf(output_file, " new_node->id = get_syntax_id(match->target_syntax_id);\n");
fprintf(output_file, " new_node->syntax_name = strdup(rule->node_type_name);\n");
fprintf(output_file, " new_node->child_count = match->using_match_id_count;\n");
fprintf(output_file, " new_node->children = new_node->child_count > 0 ? (%ssyntax_node_enclosure*)malloc(new_node->child_count * sizeof(%ssyntax_node_enclosure)) : NULL;\n", prefix, prefix);
fprintf(output_file, " new_node->parent = NULL;\n");
fprintf(output_file, " for (uint64_t i = 0; i < match->using_match_id_count; i++) {\n");
fprintf(output_file, " int idx = atoi(match->using_match_ids[i] + 1);\n");
fprintf(output_file, " %ssyntax_node_enclosure child_enc = stack[stack_offset + idx];\n", prefix);
fprintf(output_file, " if (child_enc.type == %snode) {\n", prefix);
fprintf(output_file, " child_enc.data = clone_tree((%s*)child_enc.data);\n", data_type);
fprintf(output_file, " } else {\n");
fprintf(output_file, " child_enc.data = clone_segment((%s*)child_enc.data);\n", slex_data_type);
fprintf(output_file, " }\n");
fprintf(output_file, " new_node->children[i] = child_enc;\n");
fprintf(output_file, " if (child_enc.type == %snode) {\n", prefix);
fprintf(output_file, " ((%s*)child_enc.data)->parent = new_node;\n", data_type);
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " reduced_enc.data = new_node;\n");
fprintf(output_file, " new_stack_len++;\n");
fprintf(output_file, " }\n\n");
fprintf(output_file, " if (match_ok) {\n");
fprintf(output_file, " %ssyntax_node_enclosure* new_stack = (%ssyntax_node_enclosure*)malloc((new_stack_len + 1) * sizeof(%ssyntax_node_enclosure));\n", prefix, prefix, prefix);
fprintf(output_file, " for (uint64_t i = 0; i < stack_offset; i++) {\n");
fprintf(output_file, " new_stack[i] = stack[i];\n");
fprintf(output_file, " }\n");
fprintf(output_file, " if (!skip_push) {\n");
fprintf(output_file, " new_stack[new_stack_len - 1] = reduced_enc;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " if (parse_step(new_stack, new_stack_len, input, result, depth + 1)) {\n");
fprintf(output_file, " free(new_stack);\n");
fprintf(output_file, " return true;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " if (!skip_push && reduced_enc.data) {\n");
fprintf(output_file, " free_tree((%s*)reduced_enc.data);\n", data_type);
fprintf(output_file, " }\n");
fprintf(output_file, " free(new_stack);\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n\n");
// Try shift
fprintf(output_file, " if (input != NULL) {\n");
fprintf(output_file, " uint64_t new_stack_len = stack_len + 1;\n");
fprintf(output_file, " %ssyntax_node_enclosure* new_stack = (%ssyntax_node_enclosure*)malloc(new_stack_len * sizeof(%ssyntax_node_enclosure));\n", prefix, prefix, prefix);
fprintf(output_file, " for (uint64_t i = 0; i < stack_len; i++) {\n");
fprintf(output_file, " new_stack[i] = stack[i];\n");
fprintf(output_file, " }\n");
fprintf(output_file, " new_stack[stack_len].type = %ssegment;\n", prefix);
fprintf(output_file, " new_stack[stack_len].data = clone_segment(input);\n");
fprintf(output_file, " if (parse_step(new_stack, new_stack_len, input->next, result, depth + 1)) {\n");
fprintf(output_file, " free(new_stack);\n");
fprintf(output_file, " return true;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " free_segment((%s*)new_stack[stack_len].data);\n", slex_data_type);
fprintf(output_file, " free(new_stack);\n");
fprintf(output_file, " }\n\n");
fprintf(output_file, " return false;\n");
fprintf(output_file, "}\n\n");
// Implement public functions scc_parse and scc_free
fprintf(output_file, "char %sparse(%s* head, %s** output) {\n", prefix, slex_data_type, data_type);
fprintf(output_file, " if (!head || !output) return 0;\n");
fprintf(output_file, " *output = NULL;\n");
fprintf(output_file, " %ssyntax_node_enclosure* stack = NULL;\n", prefix);
fprintf(output_file, " bool success = parse_step(stack, 0, head, output, 0);\n");
fprintf(output_file, " return success ? 1 : 0;\n");
fprintf(output_file, "}\n\n");
fprintf(output_file, "char %sfree(%s* root) {\n", prefix, data_type);
fprintf(output_file, " if (!root) return 0;\n");
fprintf(output_file, " free_tree(root);\n");
fprintf(output_file, " return 1;\n");
fprintf(output_file, "}\n");
return true;
}
+384 -2
View File
@@ -1,6 +1,388 @@
#include "../../../Headers/scc_core.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
bool scc_translate_to_file_csharp(scc_options *options, scc_rules *rules, FILE *output_file){
//Stub for now.
static bool is_rule_name(scc_rules* rules, const char* name) {
for (uint64_t i = 0; i < rules->rule_count; i++) {
if (strcmp(rules->rules[i].node_type_name, name) == 0) {
return true;
}
}
return false;
}
static void print_safe_c_string(FILE* f, const char* str) {
fputc('"', f);
for (int i = 0; str[i] != '\0'; i++) {
if (str[i] == '"') {
fprintf(f, "\\\"");
} else if (str[i] == '\\') {
fprintf(f, "\\\\");
} else {
fputc(str[i], f);
}
}
fputc('"', f);
}
bool scc_translate_to_file_csharp(scc_options *options, scc_rules *rules, FILE *output_file) {
if (!options || !rules || !output_file) return false;
char* ns_name = (options->namespace_name && strlen(options->namespace_name) > 0) ? options->namespace_name : "SCCGenerated";
char* class_name = (options->class_name && strlen(options->class_name) > 0) ? options->class_name : "SCC";
char* data_type = (options->data_type_name && strlen(options->data_type_name) > 0) ? options->data_type_name : "SyntaxNode";
char* prefix = options->prefix ? options->prefix : "";
char* slex_ns = (options->slex_namespace_name && strlen(options->slex_namespace_name) > 0) ? options->slex_namespace_name : "SLexGenerated";
char* slex_class = (options->slex_class_name && strlen(options->slex_class_name) > 0) ? options->slex_class_name : "SLex";
char* slex_prefix = options->slex_prefix ? options->slex_prefix : "";
char* slex_data_type = (options->slex_data_type_name && strlen(options->slex_data_type_name) > 0) ? options->slex_data_type_name : "Segment";
// 1. Gather all unique terminal tags/ids used in the rules
char** terminals = NULL;
uint64_t terminal_count = 0;
for (uint64_t r = 0; r < rules->rule_count; r++) {
scc_rule* rule = &rules->rules[r];
for (uint64_t m = 0; m < rule->matching_count; m++) {
scc_matching* matching = rule->matchings[m];
for (uint64_t i = 0; i < matching->match_id_count; i++) {
char* match_id = matching->match_ids[i];
size_t len = strlen(match_id);
bool is_literal = len >= 2 && ((match_id[0] == '"' && match_id[len-1] == '"') || (match_id[0] == '\'' && match_id[len-1] == '\''));
if (!is_literal && !is_rule_name(rules, match_id)) {
// Check if already in list
bool exists = false;
for (uint64_t j = 0; j < terminal_count; j++) {
if (strcmp(terminals[j], match_id) == 0) {
exists = true;
break;
}
}
if (!exists) {
terminal_count++;
terminals = (char**)realloc(terminals, terminal_count * sizeof(char*));
terminals[terminal_count - 1] = match_id;
}
}
}
}
}
// Write imports
fprintf(output_file, "using System;\n");
fprintf(output_file, "using System.Collections.Generic;\n");
fprintf(output_file, "using %s;\n\n", slex_ns);
// Open Namespace
fprintf(output_file, "namespace %s\n{\n", ns_name);
// SyntaxId enum
fprintf(output_file, " public enum %sSyntaxId\n {\n", prefix);
fprintf(output_file, " Default = 0,\n");
for (uint64_t i = 0; i < rules->syntax_id_count; i++) {
fprintf(output_file, " %s,\n", rules->syntax_ids[i]);
}
fprintf(output_file, " }\n\n");
// EnclosureType enum
fprintf(output_file, " public enum %sEnclosureType\n {\n", prefix);
fprintf(output_file, " Node,\n");
fprintf(output_file, " Segment\n");
fprintf(output_file, " }\n\n");
// SyntaxNodeEnclosure class
fprintf(output_file, " public class %sSyntaxNodeEnclosure\n {\n", prefix);
fprintf(output_file, " public %sEnclosureType EnclosureType;\n", prefix);
fprintf(output_file, " public %s? Node;\n", data_type);
fprintf(output_file, " public %s? Segment;\n\n", slex_data_type);
fprintf(output_file, " public %sSyntaxNodeEnclosure Clone()\n {\n", prefix);
fprintf(output_file, " return new %sSyntaxNodeEnclosure\n {\n", prefix);
fprintf(output_file, " EnclosureType = this.EnclosureType,\n");
fprintf(output_file, " Node = this.Node?.Clone(),\n");
fprintf(output_file, " Segment = this.Segment != null ? CloneSegment(this.Segment) : null\n");
fprintf(output_file, " };\n");
fprintf(output_file, " }\n\n");
fprintf(output_file, " private static %s CloneSegment(%s src)\n {\n", slex_data_type, slex_data_type);
fprintf(output_file, " return new %s\n {\n", slex_data_type);
fprintf(output_file, " Content = src.Content,\n");
fprintf(output_file, " FileName = src.FileName,\n");
fprintf(output_file, " Line = src.Line,\n");
fprintf(output_file, " Column = src.Column,\n");
fprintf(output_file, " Tag = src.Tag,\n");
fprintf(output_file, " Id = src.Id\n");
fprintf(output_file, " };\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n\n");
// SyntaxNode class
fprintf(output_file, " public class %s\n {\n", data_type);
fprintf(output_file, " public %s? Parent;\n", data_type);
fprintf(output_file, " public string SyntaxName = string.Empty;\n");
fprintf(output_file, " public %sSyntaxId Id;\n", prefix);
fprintf(output_file, " public List<%sSyntaxNodeEnclosure> Children = new List<%sSyntaxNodeEnclosure>();\n\n", prefix, prefix);
fprintf(output_file, " public %s Clone()\n {\n", data_type);
fprintf(output_file, " var dst = new %s\n {\n", data_type);
fprintf(output_file, " SyntaxName = this.SyntaxName,\n");
fprintf(output_file, " Id = this.Id\n");
fprintf(output_file, " };\n");
fprintf(output_file, " foreach (var child in this.Children)\n");
fprintf(output_file, " {\n");
fprintf(output_file, " var clonedChild = child.Clone();\n");
fprintf(output_file, " dst.Children.Add(clonedChild);\n");
fprintf(output_file, " if (clonedChild.EnclosureType == %sEnclosureType.Node && clonedChild.Node != null)\n", prefix);
fprintf(output_file, " {\n");
fprintf(output_file, " clonedChild.Node.Parent = dst;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " return dst;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n\n");
// SCC class
fprintf(output_file, " public class %s\n {\n", class_name);
// Inner Rule structures for representation
fprintf(output_file, " private class MatchingRuleDef\n {\n");
fprintf(output_file, " public string TargetSyntaxId = string.Empty;\n");
fprintf(output_file, " public string[] MatchIds = new string[0];\n");
fprintf(output_file, " public string[] UsingMatchIds = new string[0];\n");
fprintf(output_file, " }\n\n");
fprintf(output_file, " private class RuleDef\n {\n");
fprintf(output_file, " public string NodeTypeTypeName = string.Empty;\n");
fprintf(output_file, " public MatchingRuleDef[] Matchings = new MatchingRuleDef[0];\n");
fprintf(output_file, " }\n\n");
// RULES definition
fprintf(output_file, " private static readonly RuleDef[] RULES = new RuleDef[]\n {\n");
for (uint64_t r = 0; r < rules->rule_count; r++) {
scc_rule* rule = &rules->rules[r];
fprintf(output_file, " new RuleDef\n {\n");
fprintf(output_file, " NodeTypeTypeName = ");
print_safe_c_string(output_file, rule->node_type_name);
fprintf(output_file, ",\n");
fprintf(output_file, " Matchings = new MatchingRuleDef[]\n {\n");
for (uint64_t m = 0; m < rule->matching_count; m++) {
scc_matching* matching = rule->matchings[m];
fprintf(output_file, " new MatchingRuleDef\n {\n");
fprintf(output_file, " TargetSyntaxId = ");
print_safe_c_string(output_file, matching->target_syntax_id ? matching->target_syntax_id : "skip");
fprintf(output_file, ",\n");
fprintf(output_file, " MatchIds = new string[] { ");
for (uint64_t i = 0; i < matching->match_id_count; i++) {
print_safe_c_string(output_file, matching->match_ids[i]);
if (i < matching->match_id_count - 1) fprintf(output_file, ", ");
}
fprintf(output_file, " },\n");
fprintf(output_file, " UsingMatchIds = new string[] { ");
for (uint64_t i = 0; i < matching->using_match_id_count; i++) {
print_safe_c_string(output_file, matching->using_match_id[i]);
if (i < matching->using_match_id_count - 1) fprintf(output_file, ", ");
}
fprintf(output_file, " }\n");
fprintf(output_file, " }");
if (m < rule->matching_count - 1) fprintf(output_file, ",\n");
else fprintf(output_file, "\n");
}
fprintf(output_file, " }\n");
fprintf(output_file, " }");
if (r < rules->rule_count - 1) fprintf(output_file, ",\n");
else fprintf(output_file, "\n");
}
fprintf(output_file, " };\n\n");
// MatchElement helper
fprintf(output_file, " private static bool MatchElement(%sSyntaxNodeEnclosure enc, string pattern)\n {\n", prefix);
fprintf(output_file, " if (enc == null || pattern == null) return false;\n");
fprintf(output_file, " if (enc.EnclosureType == %sEnclosureType.Node)\n", prefix);
fprintf(output_file, " {\n");
fprintf(output_file, " return enc.Node != null && enc.Node.SyntaxName == pattern;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " else\n");
fprintf(output_file, " {\n");
fprintf(output_file, " if (enc.Segment == null) return false;\n");
fprintf(output_file, " if (pattern.Length >= 2 && ((pattern[0] == '\"' && pattern[pattern.Length - 1] == '\"') || (pattern[0] == '\\'' && pattern[pattern.Length - 1] == '\\'')))\n");
fprintf(output_file, " {\n");
fprintf(output_file, " string stripped = pattern.Substring(1, pattern.Length - 2);\n");
fprintf(output_file, " return enc.Segment.Content == stripped;\n");
fprintf(output_file, " }\n");
for (uint64_t i = 0; i < terminal_count; i++) {
fprintf(output_file, " if (pattern == \"%s\")\n", terminals[i]);
fprintf(output_file, " {\n");
fprintf(output_file, " return enc.Segment.Tag == %sTag.%s || enc.Segment.Id == %sId.%s;\n", slex_data_type, terminals[i], slex_data_type, terminals[i]);
fprintf(output_file, " }\n");
}
fprintf(output_file, " return false;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n\n");
if (terminals) free(terminals);
// GetSyntaxId helper
fprintf(output_file, " private static %sSyntaxId GetSyntaxId(string name)\n {\n", prefix);
fprintf(output_file, " if (Enum.TryParse<%sSyntaxId>(name, out var id))\n", prefix);
fprintf(output_file, " {\n");
fprintf(output_file, " return id;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " return %sSyntaxId.Default;\n", prefix);
fprintf(output_file, " }\n\n");
// Public Parse method
fprintf(output_file, " public bool Parse(%s head, out %s? root)\n {\n", slex_data_type, data_type);
fprintf(output_file, " root = null;\n");
fprintf(output_file, " if (head == null) return false;\n");
fprintf(output_file, " var stack = new List<%sSyntaxNodeEnclosure>();\n", prefix);
fprintf(output_file, " return ParseStep(stack, head, out root, 0);\n");
fprintf(output_file, " }\n\n");
// ParseStep recursive method
fprintf(output_file, " private static bool ParseStep(List<%sSyntaxNodeEnclosure> stack, %s? input, out %s? result, int depth)\n {\n", prefix, slex_data_type, data_type);
fprintf(output_file, " result = null;\n");
fprintf(output_file, " if (depth > 2000) return false;\n\n");
fprintf(output_file, " if (input == null)\n");
fprintf(output_file, " {\n");
fprintf(output_file, " if (stack.Count == 1 && stack[0].EnclosureType == %sEnclosureType.Node)\n", prefix);
fprintf(output_file, " {\n");
fprintf(output_file, " var node = stack[0].Node;\n");
fprintf(output_file, " if (node != null && node.SyntaxName == RULES[0].NodeTypeTypeName)\n");
fprintf(output_file, " {\n");
fprintf(output_file, " result = node;\n");
fprintf(output_file, " return true;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n\n");
// Try Reductions
fprintf(output_file, " for (int r = 0; r < RULES.Length; r++)\n");
fprintf(output_file, " {\n");
fprintf(output_file, " var rule = RULES[r];\n");
fprintf(output_file, " for (int m = 0; m < rule.Matchings.Length; m++)\n");
fprintf(output_file, " {\n");
fprintf(output_file, " var match = rule.Matchings[m];\n");
fprintf(output_file, " if (stack.Count >= match.MatchIds.Length)\n");
fprintf(output_file, " {\n");
fprintf(output_file, " bool matchOk = true;\n");
fprintf(output_file, " int stackOffset = stack.Count - match.MatchIds.Length;\n");
fprintf(output_file, " for (int i = 0; i < match.MatchIds.Length; i++)\n");
fprintf(output_file, " {\n");
fprintf(output_file, " if (!MatchElement(stack[stackOffset + i], match.MatchIds[i]))\n");
fprintf(output_file, " {\n");
fprintf(output_file, " matchOk = false;\n");
fprintf(output_file, " break;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n\n");
fprintf(output_file, " if (matchOk)\n");
fprintf(output_file, " {\n");
fprintf(output_file, " var newStack = new List<%sSyntaxNodeEnclosure>();\n", prefix);
fprintf(output_file, " for (int i = 0; i < stackOffset; i++)\n");
fprintf(output_file, " {\n");
fprintf(output_file, " newStack.Add(stack[i]);\n");
fprintf(output_file, " }\n\n");
fprintf(output_file, " bool skipPush = false;\n");
fprintf(output_file, " %sSyntaxNodeEnclosure? reducedEnc = null;\n\n", prefix);
fprintf(output_file, " if (match.TargetSyntaxId == \"skip\")\n");
fprintf(output_file, " {\n");
fprintf(output_file, " skipPush = true;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " else if (match.TargetSyntaxId == \"append_as_child\")\n");
fprintf(output_file, " {\n");
fprintf(output_file, " if (match.MatchIds.Length >= 1 && stack[stackOffset].EnclosureType == %sEnclosureType.Node && stack[stackOffset].Node != null)\n", prefix);
fprintf(output_file, " {\n");
fprintf(output_file, " var parentNode = stack[stackOffset].Node;\n");
fprintf(output_file, " var clonedParent = parentNode.Clone();\n\n");
fprintf(output_file, " if (match.UsingMatchIds.Length >= 1)\n");
fprintf(output_file, " {\n");
fprintf(output_file, " int childIdx = int.Parse(match.UsingMatchIds[0].Substring(1));\n");
fprintf(output_file, " var childEnc = stack[stackOffset + childIdx].Clone();\n");
fprintf(output_file, " clonedParent.Children.Add(childEnc);\n");
fprintf(output_file, " if (childEnc.EnclosureType == %sEnclosureType.Node && childEnc.Node != null)\n", prefix);
fprintf(output_file, " {\n");
fprintf(output_file, " childEnc.Node.Parent = clonedParent;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n\n");
fprintf(output_file, " reducedEnc = new %sSyntaxNodeEnclosure\n", prefix);
fprintf(output_file, " {\n");
fprintf(output_file, " EnclosureType = %sEnclosureType.Node,\n", prefix);
fprintf(output_file, " Node = clonedParent\n");
fprintf(output_file, " };\n");
fprintf(output_file, " }\n");
fprintf(output_file, " else\n");
fprintf(output_file, " {\n");
fprintf(output_file, " matchOk = false;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " else\n");
fprintf(output_file, " {\n");
fprintf(output_file, " var newNode = new %s\n", data_type);
fprintf(output_file, " {\n");
fprintf(output_file, " Id = GetSyntaxId(match.TargetSyntaxId),\n");
fprintf(output_file, " SyntaxName = rule.NodeTypeTypeName\n");
fprintf(output_file, " };\n\n");
fprintf(output_file, " for (int i = 0; i < match.UsingMatchIds.Length; i++)\n");
fprintf(output_file, " {\n");
fprintf(output_file, " int idx = int.Parse(match.UsingMatchIds[i].Substring(1));\n");
fprintf(output_file, " var childEnc = stack[stackOffset + idx].Clone();\n");
fprintf(output_file, " newNode.Children.Add(childEnc);\n");
fprintf(output_file, " if (childEnc.EnclosureType == %sEnclosureType.Node && childEnc.Node != null)\n", prefix);
fprintf(output_file, " {\n");
fprintf(output_file, " childEnc.Node.Parent = newNode;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n\n");
fprintf(output_file, " reducedEnc = new %sSyntaxNodeEnclosure\n", prefix);
fprintf(output_file, " {\n");
fprintf(output_file, " EnclosureType = %sEnclosureType.Node,\n", prefix);
fprintf(output_file, " Node = newNode\n");
fprintf(output_file, " };\n");
fprintf(output_file, " }\n\n");
fprintf(output_file, " if (matchOk)\n");
fprintf(output_file, " {\n");
fprintf(output_file, " if (!skipPush && reducedEnc != null)\n");
fprintf(output_file, " {\n");
fprintf(output_file, " newStack.Add(reducedEnc);\n");
fprintf(output_file, " }\n");
fprintf(output_file, " if (ParseStep(newStack, input, out result, depth + 1))\n");
fprintf(output_file, " {\n");
fprintf(output_file, " return true;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n\n");
// Try Shift
fprintf(output_file, " if (input != null)\n");
fprintf(output_file, " {\n");
fprintf(output_file, " var newStack = new List<%sSyntaxNodeEnclosure>(stack);\n", prefix);
fprintf(output_file, " newStack.Add(new %sSyntaxNodeEnclosure\n", prefix);
fprintf(output_file, " {\n");
fprintf(output_file, " EnclosureType = %sEnclosureType.Segment,\n", prefix);
fprintf(output_file, " Segment = input\n");
fprintf(output_file, " });\n\n");
fprintf(output_file, " if (ParseStep(newStack, input.Next, out result, depth + 1))\n");
fprintf(output_file, " {\n");
fprintf(output_file, " return true;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n\n");
fprintf(output_file, " return false;\n");
fprintf(output_file, " }\n");
fprintf(output_file, " }\n");
// Close Namespace
fprintf(output_file, "}\n");
return true;
}
+203
View File
@@ -1,7 +1,210 @@
#include "../../Headers/scc_core.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static void print_usage() {
printf("SCC (Simple Compiler-Compiler) - Version 1.0\n");
printf("Translates grammar rule files to target language parser source code.\n\n");
printf("Usage: scc [options] <input_file>\n\n");
printf("Options:\n");
printf(" -o <output> Output file\n");
printf(" -l <language> Target language: c, c#, csharp (default: c)\n");
printf(" -h <header> Output header file (C only)\n");
printf(" -ns <namespace> Specify namespace (C# only, default: SCCGenerated)\n");
printf(" -class <class_name> Specify class name (C# only, default: SCC)\n");
printf(" -prefix <prefix> Specify prefix for functions/types (default: scc_ for C, empty for C#)\n");
printf(" -data_type <name> Specify syntax node data type name\n");
printf(" -slex-ns <ns> Specify namespace used in slex (default: SLexGenerated)\n");
printf(" -slex-class <class> Specify class name used in slex (default: SLex)\n");
printf(" -slex-prefix <prefix> Specify function prefix used in slex (default: slex_)\n");
printf(" -slex-data_type <type> Specify segment data type used in slex (default: Segment)\n");
printf(" -slex-h <header> Specify header file slex generated (C only, default: slex_generated.h)\n");
}
int main(int ac, char **av)
{
char* input_file = NULL;
char* output_file = NULL;
scc_options options;
options.target_language = c_language;
options.header_output = NULL;
options.namespace_name = NULL;
options.class_name = NULL;
options.prefix = NULL;
options.data_type_name = NULL;
options.slex_namespace_name = NULL;
options.slex_class_name = NULL;
options.slex_prefix = NULL;
options.slex_data_type_name = NULL;
options.slex_header = NULL;
for (int i = 1; i < ac; i++) {
if (strcmp(av[i], "-o") == 0) {
if (i + 1 < ac) {
output_file = av[++i];
} else {
fprintf(stderr, "Error: Missing value for -o option\n");
return 1;
}
} else if (strcmp(av[i], "-l") == 0) {
if (i + 1 < ac) {
char* lang = av[++i];
if (strcmp(lang, "c") == 0) {
options.target_language = c_language;
} else if (strcmp(lang, "c#") == 0 || strcmp(lang, "csharp") == 0) {
options.target_language = csharp;
} else {
fprintf(stderr, "Error: Unsupported language '%s'\n", lang);
return 1;
}
} else {
fprintf(stderr, "Error: Missing value for -l option\n");
return 1;
}
} else if (strcmp(av[i], "-h") == 0) {
if (i + 1 < ac) {
options.header_output = av[++i];
} else {
fprintf(stderr, "Error: Missing value for -h option\n");
return 1;
}
} else if (strcmp(av[i], "-ns") == 0) {
if (i + 1 < ac) {
options.namespace_name = av[++i];
} else {
fprintf(stderr, "Error: Missing value for -ns option\n");
return 1;
}
} else if (strcmp(av[i], "-class") == 0) {
if (i + 1 < ac) {
options.class_name = av[++i];
} else {
fprintf(stderr, "Error: Missing value for -class option\n");
return 1;
}
} else if (strcmp(av[i], "-prefix") == 0) {
if (i + 1 < ac) {
options.prefix = av[++i];
} else {
fprintf(stderr, "Error: Missing value for -prefix option\n");
return 1;
}
} else if (strcmp(av[i], "-data_type") == 0) {
if (i + 1 < ac) {
options.data_type_name = av[++i];
} else {
fprintf(stderr, "Error: Missing value for -data_type option\n");
return 1;
}
} else if (strcmp(av[i], "-slex-ns") == 0) {
if (i + 1 < ac) {
options.slex_namespace_name = av[++i];
} else {
fprintf(stderr, "Error: Missing value for -slex-ns option\n");
return 1;
}
} else if (strcmp(av[i], "-slex-class") == 0) {
if (i + 1 < ac) {
options.slex_class_name = av[++i];
} else {
fprintf(stderr, "Error: Missing value for -slex-class option\n");
return 1;
}
} else if (strcmp(av[i], "-slex-prefix") == 0) {
if (i + 1 < ac) {
options.slex_prefix = av[++i];
} else {
fprintf(stderr, "Error: Missing value for -slex-prefix option\n");
return 1;
}
} else if (strcmp(av[i], "-slex-data_type") == 0) {
if (i + 1 < ac) {
options.slex_data_type_name = av[++i];
} else {
fprintf(stderr, "Error: Missing value for -slex-data_type option\n");
return 1;
}
} else if (strcmp(av[i], "-slex-h") == 0) {
if (i + 1 < ac) {
options.slex_header = av[++i];
} else {
fprintf(stderr, "Error: Missing value for -slex-h option\n");
return 1;
}
} else if (av[i][0] == '-') {
fprintf(stderr, "Error: Unknown option '%s'\n", av[i]);
print_usage();
return 1;
} else {
if (input_file == NULL) {
input_file = av[i];
} else {
fprintf(stderr, "Error: Multiple input files specified ('%s' and '%s')\n", input_file, av[i]);
return 1;
}
}
}
if (!input_file) {
fprintf(stderr, "Error: Missing input file\n");
print_usage();
return 1;
}
if (!output_file) {
fprintf(stderr, "Error: Missing output file (-o <output>)\n");
print_usage();
return 1;
}
// Set C/C# defaults if not provided
if (options.target_language == c_language) {
if (!options.prefix) options.prefix = "scc_";
if (!options.data_type_name) options.data_type_name = "scc_syntax_node";
if (!options.slex_prefix) options.slex_prefix = "slex_";
if (!options.slex_data_type_name) options.slex_data_type_name = "slex_segment";
if (!options.slex_header) options.slex_header = "slex_generated.h";
} else {
if (!options.namespace_name) options.namespace_name = "SCCGenerated";
if (!options.class_name) options.class_name = "SCC";
if (!options.data_type_name) options.data_type_name = "SyntaxNode";
if (!options.slex_namespace_name) options.slex_namespace_name = "SLexGenerated";
if (!options.slex_class_name) options.slex_class_name = "SLex";
if (!options.slex_data_type_name) options.slex_data_type_name = "Segment";
if (!options.prefix) options.prefix = "";
}
FILE* f = fopen(input_file, "r");
if (!f) {
fprintf(stderr, "Error: Could not open input file '%s'\n", input_file);
return 1;
}
scc_rules rules;
if (!scc_read_rule_from_file(f, &rules)) {
fprintf(stderr, "Error: Failed to parse rules from input file '%s'\n", input_file);
fclose(f);
return 1;
}
fclose(f);
FILE* out = fopen(output_file, "w");
if (!out) {
fprintf(stderr, "Error: Could not open output file '%s'\n", output_file);
return 1;
}
bool success = scc_translate_to_file(&options, &rules, out);
fclose(out);
if (!success) {
fprintf(stderr, "Error: Parser translation failed\n");
remove(output_file);
return 1;
}
printf("Success: Generated parser written to '%s'\n", output_file);
return 0;
}
+6 -1
View File
@@ -1,6 +1,11 @@
#include "../Headers/scc_core.h"
bool scc_translate_to_file(scc_options *options, scc_rules *rules, FILE *output_file){
//Stub for now.
if (!options || !rules || !output_file) return false;
if (options->target_language == c_language) {
return scc_translate_to_file_c(options, rules, output_file);
} else if (options->target_language == csharp) {
return scc_translate_to_file_csharp(options, rules, output_file);
}
return false;
}
+216
View File
@@ -0,0 +1,216 @@
#include "../Headers/scc_core.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
// Helper: Trim leading and trailing whitespace
static char* trim_whitespace(char* str) {
while (isspace((unsigned char)*str)) {
str++;
}
if (*str == '\0') {
return str;
}
char* end = str + strlen(str) - 1;
while (end > str && isspace((unsigned char)*end)) {
end--;
}
*(end + 1) = '\0';
return str;
}
// Helper: Get next line from buffer
static bool get_next_line(char** cursor, char* line_buf, int max_len) {
char* c = *cursor;
if (*c == '\0') return false;
int idx = 0;
while (*c != '\0' && *c != '\n' && *c != '\r' && idx < max_len - 1) {
line_buf[idx++] = *c++;
}
line_buf[idx] = '\0';
// Skip newline characters
if (*c == '\r') c++;
if (*c == '\n') c++;
*cursor = c;
return true;
}
// Helper: Split line into tokens, respecting quotes
static void split_tokens(const char* str, char*** out_tokens, uint64_t* out_count) {
char** tokens = NULL;
uint64_t count = 0;
const char* p = str;
while (*p != '\0') {
while (isspace((unsigned char)*p)) p++;
if (*p == '\0') break;
const char* start = p;
if (*p == '"') {
p++;
while (*p != '\0' && *p != '"') p++;
if (*p == '"') p++;
} else if (*p == '\'') {
p++;
while (*p != '\0' && *p != '\'') p++;
if (*p == '\'') p++;
} else {
while (*p != '\0' && !isspace((unsigned char)*p)) p++;
}
size_t len = p - start;
char* token = (char*)malloc(len + 1);
memcpy(token, start, len);
token[len] = '\0';
count++;
tokens = (char**)realloc(tokens, count * sizeof(char*));
tokens[count - 1] = token;
}
*out_tokens = tokens;
*out_count = count;
}
bool scc_read_rule_from_cstr(char *content, scc_rules *output_rule) {
if (!content || !output_rule) return false;
// Initialize output structure
output_rule->rules = NULL;
output_rule->rule_count = 0;
output_rule->syntax_ids = NULL;
output_rule->syntax_id_count = 0;
typedef enum {
STATE_NONE,
STATE_SYNTAX_IDS,
STATE_RULES
} ParserState;
ParserState state = STATE_NONE;
scc_rule* current_rule = NULL;
char* cursor = content;
char line[4096];
while (get_next_line(&cursor, line, sizeof(line))) {
char* trimmed = trim_whitespace(line);
// Skip comments and empty lines
if (trimmed[0] == '\0' || trimmed[0] == '#' || (trimmed[0] == '/' && trimmed[1] == '/')) {
continue;
}
// Section switches
if (strcmp(trimmed, "syntax_ids:") == 0) {
state = STATE_SYNTAX_IDS;
continue;
} else if (strcmp(trimmed, "rules:") == 0) {
state = STATE_RULES;
continue;
}
if (state == STATE_SYNTAX_IDS) {
// Trimmed line is a syntax ID
output_rule->syntax_id_count++;
output_rule->syntax_ids = (char**)realloc(output_rule->syntax_ids, output_rule->syntax_id_count * sizeof(char*));
output_rule->syntax_ids[output_rule->syntax_id_count - 1] = strdup(trimmed);
} else if (state == STATE_RULES) {
if (strcmp(trimmed, ";") == 0) {
current_rule = NULL;
continue;
}
if (trimmed[0] == ':' || trimmed[0] == '|') {
if (current_rule) {
char* match_part = trimmed + 1;
char** match_tokens = NULL;
uint64_t match_token_count = 0;
split_tokens(match_part, &match_tokens, &match_token_count);
scc_matching* matching = (scc_matching*)malloc(sizeof(scc_matching));
matching->match_ids = match_tokens;
matching->match_id_count = match_token_count;
matching->target_syntax_id = NULL;
matching->using_match_id = NULL;
matching->using_match_id_count = 0;
current_rule->matching_count++;
current_rule->matchings = (scc_matching**)realloc(current_rule->matchings, current_rule->matching_count * sizeof(scc_matching*));
current_rule->matchings[current_rule->matching_count - 1] = matching;
}
} else if (strncmp(trimmed, "=>", 2) == 0) {
if (current_rule && current_rule->matching_count > 0) {
scc_matching* matching = current_rule->matchings[current_rule->matching_count - 1];
char* op_part = trimmed + 2;
char** op_tokens = NULL;
uint64_t op_token_count = 0;
split_tokens(op_part, &op_tokens, &op_token_count);
if (op_token_count > 0) {
if (strcmp(op_tokens[0], "new_node") == 0) {
if (op_token_count > 1) {
matching->target_syntax_id = strdup(op_tokens[1]);
if (op_token_count > 2) {
matching->using_match_id_count = op_token_count - 2;
matching->using_match_id = (char**)malloc(matching->using_match_id_count * sizeof(char*));
for (uint64_t i = 0; i < matching->using_match_id_count; i++) {
matching->using_match_id[i] = strdup(op_tokens[2 + i]);
}
}
}
} else if (strcmp(op_tokens[0], "append_as_child") == 0) {
matching->target_syntax_id = strdup("append_as_child");
if (op_token_count > 1) {
matching->using_match_id_count = op_token_count - 1;
matching->using_match_id = (char**)malloc(matching->using_match_id_count * sizeof(char*));
for (uint64_t i = 0; i < matching->using_match_id_count; i++) {
matching->using_match_id[i] = strdup(op_tokens[1 + i]);
}
}
} else if (strcmp(op_tokens[0], "skip") == 0) {
matching->target_syntax_id = strdup("skip");
}
}
// Free op_tokens
for (uint64_t i = 0; i < op_token_count; i++) {
free(op_tokens[i]);
}
free(op_tokens);
}
} else {
// Defines a new rule (node_type_name)
output_rule->rule_count++;
output_rule->rules = (scc_rule*)realloc(output_rule->rules, output_rule->rule_count * sizeof(scc_rule));
current_rule = &output_rule->rules[output_rule->rule_count - 1];
current_rule->node_type_name = strdup(trimmed);
current_rule->matchings = NULL;
current_rule->matching_count = 0;
}
}
}
return true;
}
bool scc_read_rule_from_file(FILE *f, scc_rules *output_rule) {
if (!f || !output_rule) return false;
// Determine file size
fseek(f, 0, SEEK_END);
long size = ftell(f);
fseek(f, 0, SEEK_SET);
char* content = (char*)malloc(size + 1);
if (!content) return false;
size_t read_bytes = fread(content, 1, size, f);
content[read_bytes] = '\0';
bool success = scc_read_rule_from_cstr(content, output_rule);
free(content);
return success;
}