diff --git a/Headers/scc_core.h b/Headers/scc_core.h index 81aa32e..1926105 100644 --- a/Headers/scc_core.h +++ b/Headers/scc_core.h @@ -20,6 +20,7 @@ typedef struct scc_options char *slex_class_name; char *slex_prefix; char *slex_data_type_name; + char *slex_header; } scc_options; typedef struct scc_matching diff --git a/Source/Languages/C/scc_template.c b/Source/Languages/C/scc_template.c index fd65416..fb4e479 100644 --- a/Source/Languages/C/scc_template.c +++ b/Source/Languages/C/scc_template.c @@ -1,6 +1,477 @@ #include "../../../Headers/scc_core.h" +#include +#include +#include +#include -bool scc_translate_to_file_c(scc_options *options, scc_rules *rules, FILE *output_file){ - //Stub for now. +static bool is_rule_name(scc_rules* rules, const char* name) { + for (uint64_t i = 0; i < rules->rule_count; i++) { + if (strcmp(rules->rules[i].node_type_name, name) == 0) { + return true; + } + } return false; +} + +static bool is_syntax_id(scc_rules* rules, const char* name) { + for (uint64_t i = 0; i < rules->syntax_id_count; i++) { + if (strcmp(rules->syntax_ids[i], name) == 0) { + return true; + } + } + return false; +} + +static void print_safe_c_string(FILE* f, const char* str) { + fputc('"', f); + for (int i = 0; str[i] != '\0'; i++) { + if (str[i] == '"') { + fprintf(f, "\\\""); + } else if (str[i] == '\\') { + fprintf(f, "\\\\"); + } else { + fputc(str[i], f); + } + } + fputc('"', f); +} + +static void generate_declarations(FILE* f, scc_options* options, scc_rules* rules) { + char* prefix = options->prefix ? options->prefix : "scc_"; + char* data_type = options->data_type_name ? options->data_type_name : "scc_syntax_node"; + char* slex_data_type = options->slex_data_type_name ? options->slex_data_type_name : "slex_segment"; + + fprintf(f, "#include \n"); + fprintf(f, "#include \n"); + fprintf(f, "#include \n\n"); + + // Forward declare slex segment + fprintf(f, "struct %s;\n", slex_data_type); + fprintf(f, "typedef struct %s %s;\n\n", slex_data_type, slex_data_type); + + // scc_syntax_id enum + fprintf(f, "typedef enum %ssyntax_id {\n", prefix); + fprintf(f, " %sid_default = 0,\n", prefix); + for (uint64_t i = 0; i < rules->syntax_id_count; i++) { + fprintf(f, " %sid_%s,\n", prefix, rules->syntax_ids[i]); + } + fprintf(f, "} %ssyntax_id;\n\n", prefix); + + // scc_syntax_node_type enum + fprintf(f, "typedef enum %ssyntax_node_type {\n", prefix); + fprintf(f, " %snode,\n", prefix); + fprintf(f, " %ssegment\n", prefix); + fprintf(f, "} %ssyntax_node_type;\n\n", prefix); + + // scc_syntax_node_enclosure struct forward decl + fprintf(f, "struct %ssyntax_node_enclosure;\n\n", prefix); + + // scc_syntax_node struct + fprintf(f, "typedef struct %s {\n", data_type); + fprintf(f, " enum %ssyntax_id id;\n", prefix); + fprintf(f, " char* syntax_name;\n"); + fprintf(f, " struct %ssyntax_node_enclosure* children;\n", prefix); + fprintf(f, " uint64_t child_count;\n"); + fprintf(f, " struct %s* parent;\n", data_type); + fprintf(f, "} %s;\n\n", data_type); + + // scc_syntax_node_enclosure struct + fprintf(f, "typedef struct %ssyntax_node_enclosure {\n", prefix); + fprintf(f, " enum %ssyntax_node_type type;\n", prefix); + fprintf(f, " void* data;\n"); + fprintf(f, "} %ssyntax_node_enclosure;\n\n", prefix); + + // Matching structures definitions + fprintf(f, "typedef struct %smatching_rule_def {\n", prefix); + fprintf(f, " const char* target_syntax_id;\n"); + fprintf(f, " const char** match_ids;\n"); + fprintf(f, " uint64_t match_id_count;\n"); + fprintf(f, " const char** using_match_ids;\n"); + fprintf(f, " uint64_t using_match_id_count;\n"); + fprintf(f, "} %smatching_rule_def;\n\n", prefix); + + fprintf(f, "typedef struct %srule_def {\n", prefix); + fprintf(f, " const char* node_type_name;\n"); + fprintf(f, " const struct %smatching_rule_def* matchings;\n", prefix); + fprintf(f, " uint64_t matching_count;\n"); + fprintf(f, "} %srule_def;\n\n", prefix); + + // Function declarations + fprintf(f, "char %sparse(%s* head, %s** output);\n", prefix, slex_data_type, data_type); + fprintf(f, "char %sfree(%s* root);\n\n", prefix, data_type); +} + +bool scc_translate_to_file_c(scc_options *options, scc_rules *rules, FILE *output_file) { + if (!options || !rules || !output_file) return false; + + char* prefix = options->prefix ? options->prefix : "scc_"; + char* data_type = options->data_type_name ? options->data_type_name : "scc_syntax_node"; + char* slex_prefix = options->slex_prefix ? options->slex_prefix : "slex_"; + char* slex_data_type = options->slex_data_type_name ? options->slex_data_type_name : "slex_segment"; + + // 1. Open header file if requested + FILE* header_f = NULL; + char* header_base = NULL; + if (options->header_output && strlen(options->header_output) > 0) { + header_f = fopen(options->header_output, "w"); + if (!header_f) return false; + + char* last_slash = strrchr(options->header_output, '/'); + char* last_backslash = strrchr(options->header_output, '\\'); + char* base = options->header_output; + if (last_slash && last_slash > base) base = last_slash + 1; + if (last_backslash && last_backslash > base) base = last_backslash + 1; + header_base = strdup(base); + + fprintf(header_f, "#ifndef __SCC_GENERATED_H__\n"); + fprintf(header_f, "#define __SCC_GENERATED_H__\n\n"); + generate_declarations(header_f, options, rules); + fprintf(header_f, "#endif\n"); + fclose(header_f); + } + + // 2. Write implementation + fprintf(output_file, "#include \n"); + fprintf(output_file, "#include \n"); + fprintf(output_file, "#include \n"); + fprintf(output_file, "#include \n"); + if (options->slex_header && strlen(options->slex_header) > 0) { + fprintf(output_file, "#include \"%s\"\n\n", options->slex_header); + } else { + fprintf(output_file, "#include \"slex_generated.h\"\n\n"); + } + + if (header_base) { + fprintf(output_file, "#include \"%s\"\n\n", header_base); + free(header_base); + } else { + generate_declarations(output_file, options, rules); + } + + // Generate unique slex tags/ids list used as terminals + // We scan all rules' matchings to collect unique non-rule, non-quoted match_ids + char** terminals = NULL; + uint64_t terminal_count = 0; + for (uint64_t r = 0; r < rules->rule_count; r++) { + scc_rule* rule = &rules->rules[r]; + for (uint64_t m = 0; m < rule->matching_count; m++) { + scc_matching* matching = rule->matchings[m]; + for (uint64_t i = 0; i < matching->match_id_count; i++) { + char* match_id = matching->match_ids[i]; + size_t len = strlen(match_id); + bool is_literal = len >= 2 && ((match_id[0] == '"' && match_id[len-1] == '"') || (match_id[0] == '\'' && match_id[len-1] == '\'')); + if (!is_literal && !is_rule_name(rules, match_id)) { + // Check if already in list + bool exists = false; + for (uint64_t j = 0; j < terminal_count; j++) { + if (strcmp(terminals[j], match_id) == 0) { + exists = true; + break; + } + } + if (!exists) { + terminal_count++; + terminals = (char**)realloc(terminals, terminal_count * sizeof(char*)); + terminals[terminal_count - 1] = match_id; + } + } + } + } + } + + // Helper functions for matching + fprintf(output_file, "static bool match_element(const %ssyntax_node_enclosure* enc, const char* pattern) {\n", prefix); + fprintf(output_file, " if (!enc || !pattern) return false;\n"); + fprintf(output_file, " if (enc->type == %snode) {\n", prefix); + fprintf(output_file, " %s* node = (%s*)enc->data;\n", data_type, data_type); + fprintf(output_file, " return strcmp(node->syntax_name, pattern) == 0;\n"); + fprintf(output_file, " } else {\n"); + fprintf(output_file, " %s* segment = (%s*)enc->data;\n", slex_data_type, slex_data_type); + fprintf(output_file, " size_t len = strlen(pattern);\n"); + fprintf(output_file, " if (len >= 2 && ((pattern[0] == '\"' && pattern[len-1] == '\"') || (pattern[0] == '\\'' && pattern[len-1] == '\\''))) {\n"); + fprintf(output_file, " return (segment->length == (int64_t)(len - 2)) && (strncmp(segment->head, pattern + 1, len - 2) == 0);\n"); + fprintf(output_file, " }\n"); + for (uint64_t i = 0; i < terminal_count; i++) { + fprintf(output_file, " if (strcmp(pattern, \"%s\") == 0) {\n", terminals[i]); + fprintf(output_file, " return segment->tag == %stag_%s || segment->id == %sid_%s;\n", slex_prefix, terminals[i], slex_prefix, terminals[i]); + fprintf(output_file, " }\n"); + } + fprintf(output_file, " return false;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, "}\n\n"); + + if (terminals) free(terminals); + + // get_syntax_id function + fprintf(output_file, "static enum %ssyntax_id get_syntax_id(const char* name) {\n", prefix); + for (uint64_t i = 0; i < rules->syntax_id_count; i++) { + fprintf(output_file, " if (strcmp(name, \"%s\") == 0) return %sid_%s;\n", rules->syntax_ids[i], prefix, rules->syntax_ids[i]); + } + fprintf(output_file, " return %sid_default;\n", prefix); + fprintf(output_file, "}\n\n"); + + // clone_segment function + fprintf(output_file, "static %s* clone_segment(const %s* src) {\n", slex_data_type, slex_data_type); + fprintf(output_file, " if (!src) return NULL;\n"); + fprintf(output_file, " %s* dst = (%s*)malloc(sizeof(%s));\n", slex_data_type, slex_data_type, slex_data_type); + fprintf(output_file, " dst->head = src->head ? strdup(src->head) : NULL;\n"); + fprintf(output_file, " dst->length = src->length;\n"); + fprintf(output_file, " dst->file_name = src->file_name ? strdup(src->file_name) : NULL;\n"); + fprintf(output_file, " dst->line = src->line;\n"); + fprintf(output_file, " dst->col = src->col;\n"); + fprintf(output_file, " dst->tag = src->tag;\n"); + fprintf(output_file, " dst->id = src->id;\n"); + fprintf(output_file, " dst->prev = NULL;\n"); + fprintf(output_file, " dst->next = NULL;\n"); + fprintf(output_file, " return dst;\n"); + fprintf(output_file, "}\n\n"); + + // free_segment function + fprintf(output_file, "static void free_segment(%s* seg) {\n", slex_data_type); + fprintf(output_file, " if (!seg) return;\n"); + fprintf(output_file, " if (seg->head) free(seg->head);\n"); + fprintf(output_file, " if (seg->file_name) free(seg->file_name);\n"); + fprintf(output_file, " free(seg);\n"); + fprintf(output_file, "}\n\n"); + + // clone_tree and free_tree functions forward declaration + fprintf(output_file, "static %s* clone_tree(const %s* src);\n", data_type, data_type); + fprintf(output_file, "static void free_tree(%s* node);\n\n", data_type); + + // clone_tree implementation + fprintf(output_file, "static %s* clone_tree(const %s* src) {\n", data_type, data_type); + fprintf(output_file, " if (!src) return NULL;\n"); + fprintf(output_file, " %s* dst = (%s*)malloc(sizeof(%s));\n", data_type, data_type, data_type); + fprintf(output_file, " dst->id = src->id;\n"); + fprintf(output_file, " dst->syntax_name = src->syntax_name ? strdup(src->syntax_name) : NULL;\n"); + fprintf(output_file, " dst->child_count = src->child_count;\n"); + fprintf(output_file, " if (dst->child_count > 0) {\n"); + fprintf(output_file, " dst->children = (%ssyntax_node_enclosure*)malloc(dst->child_count * sizeof(%ssyntax_node_enclosure));\n", prefix, prefix); + fprintf(output_file, " for (uint64_t i = 0; i < dst->child_count; i++) {\n"); + fprintf(output_file, " dst->children[i].type = src->children[i].type;\n"); + fprintf(output_file, " if (src->children[i].type == %snode) {\n", prefix); + fprintf(output_file, " dst->children[i].data = clone_tree((%s*)src->children[i].data);\n", data_type); + fprintf(output_file, " ((%s*)dst->children[i].data)->parent = dst;\n", data_type); + fprintf(output_file, " } else {\n"); + fprintf(output_file, " dst->children[i].data = clone_segment((%s*)src->children[i].data);\n", slex_data_type); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " } else {\n"); + fprintf(output_file, " dst->children = NULL;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " dst->parent = NULL;\n"); + fprintf(output_file, " return dst;\n"); + fprintf(output_file, "}\n\n"); + + // free_tree implementation + fprintf(output_file, "static void free_tree(%s* node) {\n", data_type); + fprintf(output_file, " if (!node) return;\n"); + fprintf(output_file, " if (node->syntax_name) free(node->syntax_name);\n"); + fprintf(output_file, " for (uint64_t i = 0; i < node->child_count; i++) {\n"); + fprintf(output_file, " if (node->children[i].type == %snode) {\n", prefix); + fprintf(output_file, " free_tree((%s*)node->children[i].data);\n", data_type); + fprintf(output_file, " } else {\n"); + fprintf(output_file, " free_segment((%s*)node->children[i].data);\n", slex_data_type); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " if (node->children) free(node->children);\n"); + fprintf(output_file, " free(node);\n"); + fprintf(output_file, "}\n\n"); + + // Generate matching rules tables + for (uint64_t r = 0; r < rules->rule_count; r++) { + scc_rule* rule = &rules->rules[r]; + for (uint64_t m = 0; m < rule->matching_count; m++) { + scc_matching* matching = rule->matchings[m]; + + fprintf(output_file, "static const char* match_ids_%d_%d[] = {", (int)r, (int)m); + for (uint64_t i = 0; i < matching->match_id_count; i++) { + print_safe_c_string(output_file, matching->match_ids[i]); + if (i < matching->match_id_count - 1) fprintf(output_file, ", "); + } + fprintf(output_file, "};\n"); + + fprintf(output_file, "static const char* using_match_ids_%d_%d[] = {", (int)r, (int)m); + for (uint64_t i = 0; i < matching->using_match_id_count; i++) { + print_safe_c_string(output_file, matching->using_match_id[i]); + if (i < matching->using_match_id_count - 1) fprintf(output_file, ", "); + } + fprintf(output_file, "};\n"); + } + + if (rule->matching_count > 0) { + fprintf(output_file, "static const %smatching_rule_def matchings_%d[] = {\n", prefix, (int)r); + for (uint64_t m = 0; m < rule->matching_count; m++) { + scc_matching* matching = rule->matchings[m]; + fprintf(output_file, " { "); + print_safe_c_string(output_file, matching->target_syntax_id ? matching->target_syntax_id : "skip"); + fprintf(output_file, ", match_ids_%d_%d, %d, using_match_ids_%d_%d, %d }", + (int)r, (int)m, (int)matching->match_id_count, + (int)r, (int)m, (int)matching->using_match_id_count); + if (m < rule->matching_count - 1) fprintf(output_file, ",\n"); + } + fprintf(output_file, "\n};\n"); + } + } + + // RULES array + fprintf(output_file, "static const %srule_def RULES[] = {\n", prefix); + for (uint64_t r = 0; r < rules->rule_count; r++) { + scc_rule* rule = &rules->rules[r]; + if (rule->matching_count > 0) { + fprintf(output_file, " { "); + print_safe_c_string(output_file, rule->node_type_name); + fprintf(output_file, ", matchings_%d, %d }", (int)r, (int)rule->matching_count); + } else { + fprintf(output_file, " { "); + print_safe_c_string(output_file, rule->node_type_name); + fprintf(output_file, ", NULL, 0 }"); + } + if (r < rules->rule_count - 1) fprintf(output_file, ",\n"); + } + fprintf(output_file, "\n};\n\n"); + + // parse_step function + fprintf(output_file, "static bool parse_step(%ssyntax_node_enclosure* stack, uint64_t stack_len, %s* input, %s** result, int depth) {\n", prefix, slex_data_type, data_type); + fprintf(output_file, " if (depth > 2000) return false;\n\n"); + fprintf(output_file, " if (input == NULL) {\n"); + fprintf(output_file, " if (stack_len == 1 && stack[0].type == %snode) {\n", prefix); + fprintf(output_file, " %s* node = (%s*)stack[0].data;\n", data_type, data_type); + fprintf(output_file, " if (strcmp(node->syntax_name, \"%s\") == 0) {\n", rules->rules[0].node_type_name); + fprintf(output_file, " *result = node;\n"); + fprintf(output_file, " return true;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + + // Try reductions + fprintf(output_file, " for (uint64_t r = 0; r < %d; r++) {\n", (int)rules->rule_count); + fprintf(output_file, " const %srule_def* rule = &RULES[r];\n", prefix); + fprintf(output_file, " for (uint64_t m = 0; m < rule->matching_count; m++) {\n"); + fprintf(output_file, " const %smatching_rule_def* match = &rule->matchings[m];\n", prefix); + fprintf(output_file, " if (stack_len >= match->match_id_count) {\n"); + fprintf(output_file, " bool match_ok = true;\n"); + fprintf(output_file, " uint64_t stack_offset = stack_len - match->match_id_count;\n"); + fprintf(output_file, " for (uint64_t i = 0; i < match->match_id_count; i++) {\n"); + fprintf(output_file, " if (!match_element(&stack[stack_offset + i], match->match_ids[i])) {\n"); + fprintf(output_file, " match_ok = false;\n"); + fprintf(output_file, " break;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " if (match_ok) {\n"); + fprintf(output_file, " uint64_t new_stack_len = stack_offset;\n"); + fprintf(output_file, " bool skip_push = false;\n"); + fprintf(output_file, " %ssyntax_node_enclosure reduced_enc;\n", prefix); + fprintf(output_file, " reduced_enc.type = %snode;\n", prefix); + fprintf(output_file, " reduced_enc.data = NULL;\n\n"); + + fprintf(output_file, " if (strcmp(match->target_syntax_id, \"skip\") == 0) {\n"); + fprintf(output_file, " skip_push = true;\n"); + fprintf(output_file, " } else if (strcmp(match->target_syntax_id, \"append_as_child\") == 0) {\n"); + fprintf(output_file, " if (match->match_id_count >= 1 && stack[stack_offset].type == %snode) {\n", prefix); + fprintf(output_file, " %s* parent_node = (%s*)stack[stack_offset].data;\n", data_type, data_type); + fprintf(output_file, " %s* cloned_parent = clone_tree(parent_node);\n", data_type); + fprintf(output_file, " if (match->using_match_id_count >= 1) {\n"); + fprintf(output_file, " int child_idx = atoi(match->using_match_ids[0] + 1);\n"); + fprintf(output_file, " %ssyntax_node_enclosure child_enc = stack[stack_offset + child_idx];\n", prefix); + fprintf(output_file, " if (child_enc.type == %snode) {\n", prefix); + fprintf(output_file, " child_enc.data = clone_tree((%s*)child_enc.data);\n", data_type); + fprintf(output_file, " } else {\n"); + fprintf(output_file, " child_enc.data = clone_segment((%s*)child_enc.data);\n", slex_data_type); + fprintf(output_file, " }\n"); + fprintf(output_file, " cloned_parent->child_count++;\n"); + fprintf(output_file, " cloned_parent->children = (%ssyntax_node_enclosure*)realloc(cloned_parent->children, cloned_parent->child_count * sizeof(%ssyntax_node_enclosure));\n", prefix, prefix); + fprintf(output_file, " cloned_parent->children[cloned_parent->child_count - 1] = child_enc;\n"); + fprintf(output_file, " if (child_enc.type == %snode) {\n", prefix); + fprintf(output_file, " ((%s*)child_enc.data)->parent = cloned_parent;\n", data_type); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " reduced_enc.data = cloned_parent;\n"); + fprintf(output_file, " new_stack_len++;\n"); + fprintf(output_file, " } else {\n"); + fprintf(output_file, " match_ok = false;\n"); + fprintf(output_file, " }\n"); + + fprintf(output_file, " } else {\n"); + fprintf(output_file, " %s* new_node = (%s*)malloc(sizeof(%s));\n", data_type, data_type, data_type); + fprintf(output_file, " new_node->id = get_syntax_id(match->target_syntax_id);\n"); + fprintf(output_file, " new_node->syntax_name = strdup(rule->node_type_name);\n"); + fprintf(output_file, " new_node->child_count = match->using_match_id_count;\n"); + fprintf(output_file, " new_node->children = new_node->child_count > 0 ? (%ssyntax_node_enclosure*)malloc(new_node->child_count * sizeof(%ssyntax_node_enclosure)) : NULL;\n", prefix, prefix); + fprintf(output_file, " new_node->parent = NULL;\n"); + fprintf(output_file, " for (uint64_t i = 0; i < match->using_match_id_count; i++) {\n"); + fprintf(output_file, " int idx = atoi(match->using_match_ids[i] + 1);\n"); + fprintf(output_file, " %ssyntax_node_enclosure child_enc = stack[stack_offset + idx];\n", prefix); + fprintf(output_file, " if (child_enc.type == %snode) {\n", prefix); + fprintf(output_file, " child_enc.data = clone_tree((%s*)child_enc.data);\n", data_type); + fprintf(output_file, " } else {\n"); + fprintf(output_file, " child_enc.data = clone_segment((%s*)child_enc.data);\n", slex_data_type); + fprintf(output_file, " }\n"); + fprintf(output_file, " new_node->children[i] = child_enc;\n"); + fprintf(output_file, " if (child_enc.type == %snode) {\n", prefix); + fprintf(output_file, " ((%s*)child_enc.data)->parent = new_node;\n", data_type); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " reduced_enc.data = new_node;\n"); + fprintf(output_file, " new_stack_len++;\n"); + fprintf(output_file, " }\n\n"); + + fprintf(output_file, " if (match_ok) {\n"); + fprintf(output_file, " %ssyntax_node_enclosure* new_stack = (%ssyntax_node_enclosure*)malloc((new_stack_len + 1) * sizeof(%ssyntax_node_enclosure));\n", prefix, prefix, prefix); + fprintf(output_file, " for (uint64_t i = 0; i < stack_offset; i++) {\n"); + fprintf(output_file, " new_stack[i] = stack[i];\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " if (!skip_push) {\n"); + fprintf(output_file, " new_stack[new_stack_len - 1] = reduced_enc;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " if (parse_step(new_stack, new_stack_len, input, result, depth + 1)) {\n"); + fprintf(output_file, " free(new_stack);\n"); + fprintf(output_file, " return true;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " if (!skip_push && reduced_enc.data) {\n"); + fprintf(output_file, " free_tree((%s*)reduced_enc.data);\n", data_type); + fprintf(output_file, " }\n"); + fprintf(output_file, " free(new_stack);\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + + // Try shift + fprintf(output_file, " if (input != NULL) {\n"); + fprintf(output_file, " uint64_t new_stack_len = stack_len + 1;\n"); + fprintf(output_file, " %ssyntax_node_enclosure* new_stack = (%ssyntax_node_enclosure*)malloc(new_stack_len * sizeof(%ssyntax_node_enclosure));\n", prefix, prefix, prefix); + fprintf(output_file, " for (uint64_t i = 0; i < stack_len; i++) {\n"); + fprintf(output_file, " new_stack[i] = stack[i];\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " new_stack[stack_len].type = %ssegment;\n", prefix); + fprintf(output_file, " new_stack[stack_len].data = clone_segment(input);\n"); + fprintf(output_file, " if (parse_step(new_stack, new_stack_len, input->next, result, depth + 1)) {\n"); + fprintf(output_file, " free(new_stack);\n"); + fprintf(output_file, " return true;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " free_segment((%s*)new_stack[stack_len].data);\n", slex_data_type); + fprintf(output_file, " free(new_stack);\n"); + fprintf(output_file, " }\n\n"); + + fprintf(output_file, " return false;\n"); + fprintf(output_file, "}\n\n"); + + // Implement public functions scc_parse and scc_free + fprintf(output_file, "char %sparse(%s* head, %s** output) {\n", prefix, slex_data_type, data_type); + fprintf(output_file, " if (!head || !output) return 0;\n"); + fprintf(output_file, " *output = NULL;\n"); + fprintf(output_file, " %ssyntax_node_enclosure* stack = NULL;\n", prefix); + fprintf(output_file, " bool success = parse_step(stack, 0, head, output, 0);\n"); + fprintf(output_file, " return success ? 1 : 0;\n"); + fprintf(output_file, "}\n\n"); + + fprintf(output_file, "char %sfree(%s* root) {\n", prefix, data_type); + fprintf(output_file, " if (!root) return 0;\n"); + fprintf(output_file, " free_tree(root);\n"); + fprintf(output_file, " return 1;\n"); + fprintf(output_file, "}\n"); + + return true; } \ No newline at end of file diff --git a/Source/Languages/CSharp/scc_template.c b/Source/Languages/CSharp/scc_template.c index d6e3902..839288e 100644 --- a/Source/Languages/CSharp/scc_template.c +++ b/Source/Languages/CSharp/scc_template.c @@ -1,6 +1,388 @@ #include "../../../Headers/scc_core.h" +#include +#include +#include +#include -bool scc_translate_to_file_csharp(scc_options *options, scc_rules *rules, FILE *output_file){ - //Stub for now. +static bool is_rule_name(scc_rules* rules, const char* name) { + for (uint64_t i = 0; i < rules->rule_count; i++) { + if (strcmp(rules->rules[i].node_type_name, name) == 0) { + return true; + } + } return false; +} + +static void print_safe_c_string(FILE* f, const char* str) { + fputc('"', f); + for (int i = 0; str[i] != '\0'; i++) { + if (str[i] == '"') { + fprintf(f, "\\\""); + } else if (str[i] == '\\') { + fprintf(f, "\\\\"); + } else { + fputc(str[i], f); + } + } + fputc('"', f); +} + +bool scc_translate_to_file_csharp(scc_options *options, scc_rules *rules, FILE *output_file) { + if (!options || !rules || !output_file) return false; + + char* ns_name = (options->namespace_name && strlen(options->namespace_name) > 0) ? options->namespace_name : "SCCGenerated"; + char* class_name = (options->class_name && strlen(options->class_name) > 0) ? options->class_name : "SCC"; + char* data_type = (options->data_type_name && strlen(options->data_type_name) > 0) ? options->data_type_name : "SyntaxNode"; + char* prefix = options->prefix ? options->prefix : ""; + + char* slex_ns = (options->slex_namespace_name && strlen(options->slex_namespace_name) > 0) ? options->slex_namespace_name : "SLexGenerated"; + char* slex_class = (options->slex_class_name && strlen(options->slex_class_name) > 0) ? options->slex_class_name : "SLex"; + char* slex_prefix = options->slex_prefix ? options->slex_prefix : ""; + char* slex_data_type = (options->slex_data_type_name && strlen(options->slex_data_type_name) > 0) ? options->slex_data_type_name : "Segment"; + + // 1. Gather all unique terminal tags/ids used in the rules + char** terminals = NULL; + uint64_t terminal_count = 0; + for (uint64_t r = 0; r < rules->rule_count; r++) { + scc_rule* rule = &rules->rules[r]; + for (uint64_t m = 0; m < rule->matching_count; m++) { + scc_matching* matching = rule->matchings[m]; + for (uint64_t i = 0; i < matching->match_id_count; i++) { + char* match_id = matching->match_ids[i]; + size_t len = strlen(match_id); + bool is_literal = len >= 2 && ((match_id[0] == '"' && match_id[len-1] == '"') || (match_id[0] == '\'' && match_id[len-1] == '\'')); + if (!is_literal && !is_rule_name(rules, match_id)) { + // Check if already in list + bool exists = false; + for (uint64_t j = 0; j < terminal_count; j++) { + if (strcmp(terminals[j], match_id) == 0) { + exists = true; + break; + } + } + if (!exists) { + terminal_count++; + terminals = (char**)realloc(terminals, terminal_count * sizeof(char*)); + terminals[terminal_count - 1] = match_id; + } + } + } + } + } + + // Write imports + fprintf(output_file, "using System;\n"); + fprintf(output_file, "using System.Collections.Generic;\n"); + fprintf(output_file, "using %s;\n\n", slex_ns); + + // Open Namespace + fprintf(output_file, "namespace %s\n{\n", ns_name); + + // SyntaxId enum + fprintf(output_file, " public enum %sSyntaxId\n {\n", prefix); + fprintf(output_file, " Default = 0,\n"); + for (uint64_t i = 0; i < rules->syntax_id_count; i++) { + fprintf(output_file, " %s,\n", rules->syntax_ids[i]); + } + fprintf(output_file, " }\n\n"); + + // EnclosureType enum + fprintf(output_file, " public enum %sEnclosureType\n {\n", prefix); + fprintf(output_file, " Node,\n"); + fprintf(output_file, " Segment\n"); + fprintf(output_file, " }\n\n"); + + // SyntaxNodeEnclosure class + fprintf(output_file, " public class %sSyntaxNodeEnclosure\n {\n", prefix); + fprintf(output_file, " public %sEnclosureType EnclosureType;\n", prefix); + fprintf(output_file, " public %s? Node;\n", data_type); + fprintf(output_file, " public %s? Segment;\n\n", slex_data_type); + + fprintf(output_file, " public %sSyntaxNodeEnclosure Clone()\n {\n", prefix); + fprintf(output_file, " return new %sSyntaxNodeEnclosure\n {\n", prefix); + fprintf(output_file, " EnclosureType = this.EnclosureType,\n"); + fprintf(output_file, " Node = this.Node?.Clone(),\n"); + fprintf(output_file, " Segment = this.Segment != null ? CloneSegment(this.Segment) : null\n"); + fprintf(output_file, " };\n"); + fprintf(output_file, " }\n\n"); + + fprintf(output_file, " private static %s CloneSegment(%s src)\n {\n", slex_data_type, slex_data_type); + fprintf(output_file, " return new %s\n {\n", slex_data_type); + fprintf(output_file, " Content = src.Content,\n"); + fprintf(output_file, " FileName = src.FileName,\n"); + fprintf(output_file, " Line = src.Line,\n"); + fprintf(output_file, " Column = src.Column,\n"); + fprintf(output_file, " Tag = src.Tag,\n"); + fprintf(output_file, " Id = src.Id\n"); + fprintf(output_file, " };\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + + // SyntaxNode class + fprintf(output_file, " public class %s\n {\n", data_type); + fprintf(output_file, " public %s? Parent;\n", data_type); + fprintf(output_file, " public string SyntaxName = string.Empty;\n"); + fprintf(output_file, " public %sSyntaxId Id;\n", prefix); + fprintf(output_file, " public List<%sSyntaxNodeEnclosure> Children = new List<%sSyntaxNodeEnclosure>();\n\n", prefix, prefix); + + fprintf(output_file, " public %s Clone()\n {\n", data_type); + fprintf(output_file, " var dst = new %s\n {\n", data_type); + fprintf(output_file, " SyntaxName = this.SyntaxName,\n"); + fprintf(output_file, " Id = this.Id\n"); + fprintf(output_file, " };\n"); + fprintf(output_file, " foreach (var child in this.Children)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " var clonedChild = child.Clone();\n"); + fprintf(output_file, " dst.Children.Add(clonedChild);\n"); + fprintf(output_file, " if (clonedChild.EnclosureType == %sEnclosureType.Node && clonedChild.Node != null)\n", prefix); + fprintf(output_file, " {\n"); + fprintf(output_file, " clonedChild.Node.Parent = dst;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " return dst;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + + // SCC class + fprintf(output_file, " public class %s\n {\n", class_name); + + // Inner Rule structures for representation + fprintf(output_file, " private class MatchingRuleDef\n {\n"); + fprintf(output_file, " public string TargetSyntaxId = string.Empty;\n"); + fprintf(output_file, " public string[] MatchIds = new string[0];\n"); + fprintf(output_file, " public string[] UsingMatchIds = new string[0];\n"); + fprintf(output_file, " }\n\n"); + + fprintf(output_file, " private class RuleDef\n {\n"); + fprintf(output_file, " public string NodeTypeTypeName = string.Empty;\n"); + fprintf(output_file, " public MatchingRuleDef[] Matchings = new MatchingRuleDef[0];\n"); + fprintf(output_file, " }\n\n"); + + // RULES definition + fprintf(output_file, " private static readonly RuleDef[] RULES = new RuleDef[]\n {\n"); + for (uint64_t r = 0; r < rules->rule_count; r++) { + scc_rule* rule = &rules->rules[r]; + fprintf(output_file, " new RuleDef\n {\n"); + fprintf(output_file, " NodeTypeTypeName = "); + print_safe_c_string(output_file, rule->node_type_name); + fprintf(output_file, ",\n"); + fprintf(output_file, " Matchings = new MatchingRuleDef[]\n {\n"); + for (uint64_t m = 0; m < rule->matching_count; m++) { + scc_matching* matching = rule->matchings[m]; + fprintf(output_file, " new MatchingRuleDef\n {\n"); + fprintf(output_file, " TargetSyntaxId = "); + print_safe_c_string(output_file, matching->target_syntax_id ? matching->target_syntax_id : "skip"); + fprintf(output_file, ",\n"); + + fprintf(output_file, " MatchIds = new string[] { "); + for (uint64_t i = 0; i < matching->match_id_count; i++) { + print_safe_c_string(output_file, matching->match_ids[i]); + if (i < matching->match_id_count - 1) fprintf(output_file, ", "); + } + fprintf(output_file, " },\n"); + + fprintf(output_file, " UsingMatchIds = new string[] { "); + for (uint64_t i = 0; i < matching->using_match_id_count; i++) { + print_safe_c_string(output_file, matching->using_match_id[i]); + if (i < matching->using_match_id_count - 1) fprintf(output_file, ", "); + } + fprintf(output_file, " }\n"); + + fprintf(output_file, " }"); + if (m < rule->matching_count - 1) fprintf(output_file, ",\n"); + else fprintf(output_file, "\n"); + } + fprintf(output_file, " }\n"); + fprintf(output_file, " }"); + if (r < rules->rule_count - 1) fprintf(output_file, ",\n"); + else fprintf(output_file, "\n"); + } + fprintf(output_file, " };\n\n"); + + // MatchElement helper + fprintf(output_file, " private static bool MatchElement(%sSyntaxNodeEnclosure enc, string pattern)\n {\n", prefix); + fprintf(output_file, " if (enc == null || pattern == null) return false;\n"); + fprintf(output_file, " if (enc.EnclosureType == %sEnclosureType.Node)\n", prefix); + fprintf(output_file, " {\n"); + fprintf(output_file, " return enc.Node != null && enc.Node.SyntaxName == pattern;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " else\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " if (enc.Segment == null) return false;\n"); + fprintf(output_file, " if (pattern.Length >= 2 && ((pattern[0] == '\"' && pattern[pattern.Length - 1] == '\"') || (pattern[0] == '\\'' && pattern[pattern.Length - 1] == '\\'')))\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " string stripped = pattern.Substring(1, pattern.Length - 2);\n"); + fprintf(output_file, " return enc.Segment.Content == stripped;\n"); + fprintf(output_file, " }\n"); + for (uint64_t i = 0; i < terminal_count; i++) { + fprintf(output_file, " if (pattern == \"%s\")\n", terminals[i]); + fprintf(output_file, " {\n"); + fprintf(output_file, " return enc.Segment.Tag == %sTag.%s || enc.Segment.Id == %sId.%s;\n", slex_data_type, terminals[i], slex_data_type, terminals[i]); + fprintf(output_file, " }\n"); + } + fprintf(output_file, " return false;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + + if (terminals) free(terminals); + + // GetSyntaxId helper + fprintf(output_file, " private static %sSyntaxId GetSyntaxId(string name)\n {\n", prefix); + fprintf(output_file, " if (Enum.TryParse<%sSyntaxId>(name, out var id))\n", prefix); + fprintf(output_file, " {\n"); + fprintf(output_file, " return id;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " return %sSyntaxId.Default;\n", prefix); + fprintf(output_file, " }\n\n"); + + // Public Parse method + fprintf(output_file, " public bool Parse(%s head, out %s? root)\n {\n", slex_data_type, data_type); + fprintf(output_file, " root = null;\n"); + fprintf(output_file, " if (head == null) return false;\n"); + fprintf(output_file, " var stack = new List<%sSyntaxNodeEnclosure>();\n", prefix); + fprintf(output_file, " return ParseStep(stack, head, out root, 0);\n"); + fprintf(output_file, " }\n\n"); + + // ParseStep recursive method + fprintf(output_file, " private static bool ParseStep(List<%sSyntaxNodeEnclosure> stack, %s? input, out %s? result, int depth)\n {\n", prefix, slex_data_type, data_type); + fprintf(output_file, " result = null;\n"); + fprintf(output_file, " if (depth > 2000) return false;\n\n"); + fprintf(output_file, " if (input == null)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " if (stack.Count == 1 && stack[0].EnclosureType == %sEnclosureType.Node)\n", prefix); + fprintf(output_file, " {\n"); + fprintf(output_file, " var node = stack[0].Node;\n"); + fprintf(output_file, " if (node != null && node.SyntaxName == RULES[0].NodeTypeTypeName)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " result = node;\n"); + fprintf(output_file, " return true;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + + // Try Reductions + fprintf(output_file, " for (int r = 0; r < RULES.Length; r++)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " var rule = RULES[r];\n"); + fprintf(output_file, " for (int m = 0; m < rule.Matchings.Length; m++)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " var match = rule.Matchings[m];\n"); + fprintf(output_file, " if (stack.Count >= match.MatchIds.Length)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " bool matchOk = true;\n"); + fprintf(output_file, " int stackOffset = stack.Count - match.MatchIds.Length;\n"); + fprintf(output_file, " for (int i = 0; i < match.MatchIds.Length; i++)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " if (!MatchElement(stack[stackOffset + i], match.MatchIds[i]))\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " matchOk = false;\n"); + fprintf(output_file, " break;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + + fprintf(output_file, " if (matchOk)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " var newStack = new List<%sSyntaxNodeEnclosure>();\n", prefix); + fprintf(output_file, " for (int i = 0; i < stackOffset; i++)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " newStack.Add(stack[i]);\n"); + fprintf(output_file, " }\n\n"); + + fprintf(output_file, " bool skipPush = false;\n"); + fprintf(output_file, " %sSyntaxNodeEnclosure? reducedEnc = null;\n\n", prefix); + + fprintf(output_file, " if (match.TargetSyntaxId == \"skip\")\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " skipPush = true;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " else if (match.TargetSyntaxId == \"append_as_child\")\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " if (match.MatchIds.Length >= 1 && stack[stackOffset].EnclosureType == %sEnclosureType.Node && stack[stackOffset].Node != null)\n", prefix); + fprintf(output_file, " {\n"); + fprintf(output_file, " var parentNode = stack[stackOffset].Node;\n"); + fprintf(output_file, " var clonedParent = parentNode.Clone();\n\n"); + fprintf(output_file, " if (match.UsingMatchIds.Length >= 1)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " int childIdx = int.Parse(match.UsingMatchIds[0].Substring(1));\n"); + fprintf(output_file, " var childEnc = stack[stackOffset + childIdx].Clone();\n"); + fprintf(output_file, " clonedParent.Children.Add(childEnc);\n"); + fprintf(output_file, " if (childEnc.EnclosureType == %sEnclosureType.Node && childEnc.Node != null)\n", prefix); + fprintf(output_file, " {\n"); + fprintf(output_file, " childEnc.Node.Parent = clonedParent;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + fprintf(output_file, " reducedEnc = new %sSyntaxNodeEnclosure\n", prefix); + fprintf(output_file, " {\n"); + fprintf(output_file, " EnclosureType = %sEnclosureType.Node,\n", prefix); + fprintf(output_file, " Node = clonedParent\n"); + fprintf(output_file, " };\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " else\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " matchOk = false;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " else\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " var newNode = new %s\n", data_type); + fprintf(output_file, " {\n"); + fprintf(output_file, " Id = GetSyntaxId(match.TargetSyntaxId),\n"); + fprintf(output_file, " SyntaxName = rule.NodeTypeTypeName\n"); + fprintf(output_file, " };\n\n"); + fprintf(output_file, " for (int i = 0; i < match.UsingMatchIds.Length; i++)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " int idx = int.Parse(match.UsingMatchIds[i].Substring(1));\n"); + fprintf(output_file, " var childEnc = stack[stackOffset + idx].Clone();\n"); + fprintf(output_file, " newNode.Children.Add(childEnc);\n"); + fprintf(output_file, " if (childEnc.EnclosureType == %sEnclosureType.Node && childEnc.Node != null)\n", prefix); + fprintf(output_file, " {\n"); + fprintf(output_file, " childEnc.Node.Parent = newNode;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + fprintf(output_file, " reducedEnc = new %sSyntaxNodeEnclosure\n", prefix); + fprintf(output_file, " {\n"); + fprintf(output_file, " EnclosureType = %sEnclosureType.Node,\n", prefix); + fprintf(output_file, " Node = newNode\n"); + fprintf(output_file, " };\n"); + fprintf(output_file, " }\n\n"); + + fprintf(output_file, " if (matchOk)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " if (!skipPush && reducedEnc != null)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " newStack.Add(reducedEnc);\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " if (ParseStep(newStack, input, out result, depth + 1))\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " return true;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + + // Try Shift + fprintf(output_file, " if (input != null)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " var newStack = new List<%sSyntaxNodeEnclosure>(stack);\n", prefix); + fprintf(output_file, " newStack.Add(new %sSyntaxNodeEnclosure\n", prefix); + fprintf(output_file, " {\n"); + fprintf(output_file, " EnclosureType = %sEnclosureType.Segment,\n", prefix); + fprintf(output_file, " Segment = input\n"); + fprintf(output_file, " });\n\n"); + fprintf(output_file, " if (ParseStep(newStack, input.Next, out result, depth + 1))\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " return true;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + + fprintf(output_file, " return false;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + + // Close Namespace + fprintf(output_file, "}\n"); + + return true; } \ No newline at end of file diff --git a/Source/cli/main.c b/Source/cli/main.c index 7c806ee..b76e9a7 100644 --- a/Source/cli/main.c +++ b/Source/cli/main.c @@ -1,7 +1,210 @@ #include "../../Headers/scc_core.h" +#include +#include +#include + +static void print_usage() { + printf("SCC (Simple Compiler-Compiler) - Version 1.0\n"); + printf("Translates grammar rule files to target language parser source code.\n\n"); + printf("Usage: scc [options] \n\n"); + printf("Options:\n"); + printf(" -o Output file\n"); + printf(" -l Target language: c, c#, csharp (default: c)\n"); + printf(" -h
Output header file (C only)\n"); + printf(" -ns Specify namespace (C# only, default: SCCGenerated)\n"); + printf(" -class Specify class name (C# only, default: SCC)\n"); + printf(" -prefix Specify prefix for functions/types (default: scc_ for C, empty for C#)\n"); + printf(" -data_type Specify syntax node data type name\n"); + printf(" -slex-ns Specify namespace used in slex (default: SLexGenerated)\n"); + printf(" -slex-class Specify class name used in slex (default: SLex)\n"); + printf(" -slex-prefix Specify function prefix used in slex (default: slex_)\n"); + printf(" -slex-data_type Specify segment data type used in slex (default: Segment)\n"); + printf(" -slex-h
Specify header file slex generated (C only, default: slex_generated.h)\n"); +} int main(int ac, char **av) { + char* input_file = NULL; + char* output_file = NULL; + scc_options options; + options.target_language = c_language; + options.header_output = NULL; + options.namespace_name = NULL; + options.class_name = NULL; + options.prefix = NULL; + options.data_type_name = NULL; + options.slex_namespace_name = NULL; + options.slex_class_name = NULL; + options.slex_prefix = NULL; + options.slex_data_type_name = NULL; + options.slex_header = NULL; + + for (int i = 1; i < ac; i++) { + if (strcmp(av[i], "-o") == 0) { + if (i + 1 < ac) { + output_file = av[++i]; + } else { + fprintf(stderr, "Error: Missing value for -o option\n"); + return 1; + } + } else if (strcmp(av[i], "-l") == 0) { + if (i + 1 < ac) { + char* lang = av[++i]; + if (strcmp(lang, "c") == 0) { + options.target_language = c_language; + } else if (strcmp(lang, "c#") == 0 || strcmp(lang, "csharp") == 0) { + options.target_language = csharp; + } else { + fprintf(stderr, "Error: Unsupported language '%s'\n", lang); + return 1; + } + } else { + fprintf(stderr, "Error: Missing value for -l option\n"); + return 1; + } + } else if (strcmp(av[i], "-h") == 0) { + if (i + 1 < ac) { + options.header_output = av[++i]; + } else { + fprintf(stderr, "Error: Missing value for -h option\n"); + return 1; + } + } else if (strcmp(av[i], "-ns") == 0) { + if (i + 1 < ac) { + options.namespace_name = av[++i]; + } else { + fprintf(stderr, "Error: Missing value for -ns option\n"); + return 1; + } + } else if (strcmp(av[i], "-class") == 0) { + if (i + 1 < ac) { + options.class_name = av[++i]; + } else { + fprintf(stderr, "Error: Missing value for -class option\n"); + return 1; + } + } else if (strcmp(av[i], "-prefix") == 0) { + if (i + 1 < ac) { + options.prefix = av[++i]; + } else { + fprintf(stderr, "Error: Missing value for -prefix option\n"); + return 1; + } + } else if (strcmp(av[i], "-data_type") == 0) { + if (i + 1 < ac) { + options.data_type_name = av[++i]; + } else { + fprintf(stderr, "Error: Missing value for -data_type option\n"); + return 1; + } + } else if (strcmp(av[i], "-slex-ns") == 0) { + if (i + 1 < ac) { + options.slex_namespace_name = av[++i]; + } else { + fprintf(stderr, "Error: Missing value for -slex-ns option\n"); + return 1; + } + } else if (strcmp(av[i], "-slex-class") == 0) { + if (i + 1 < ac) { + options.slex_class_name = av[++i]; + } else { + fprintf(stderr, "Error: Missing value for -slex-class option\n"); + return 1; + } + } else if (strcmp(av[i], "-slex-prefix") == 0) { + if (i + 1 < ac) { + options.slex_prefix = av[++i]; + } else { + fprintf(stderr, "Error: Missing value for -slex-prefix option\n"); + return 1; + } + } else if (strcmp(av[i], "-slex-data_type") == 0) { + if (i + 1 < ac) { + options.slex_data_type_name = av[++i]; + } else { + fprintf(stderr, "Error: Missing value for -slex-data_type option\n"); + return 1; + } + } else if (strcmp(av[i], "-slex-h") == 0) { + if (i + 1 < ac) { + options.slex_header = av[++i]; + } else { + fprintf(stderr, "Error: Missing value for -slex-h option\n"); + return 1; + } + } else if (av[i][0] == '-') { + fprintf(stderr, "Error: Unknown option '%s'\n", av[i]); + print_usage(); + return 1; + } else { + if (input_file == NULL) { + input_file = av[i]; + } else { + fprintf(stderr, "Error: Multiple input files specified ('%s' and '%s')\n", input_file, av[i]); + return 1; + } + } + } + + if (!input_file) { + fprintf(stderr, "Error: Missing input file\n"); + print_usage(); + return 1; + } + + if (!output_file) { + fprintf(stderr, "Error: Missing output file (-o )\n"); + print_usage(); + return 1; + } + + // Set C/C# defaults if not provided + if (options.target_language == c_language) { + if (!options.prefix) options.prefix = "scc_"; + if (!options.data_type_name) options.data_type_name = "scc_syntax_node"; + if (!options.slex_prefix) options.slex_prefix = "slex_"; + if (!options.slex_data_type_name) options.slex_data_type_name = "slex_segment"; + if (!options.slex_header) options.slex_header = "slex_generated.h"; + } else { + if (!options.namespace_name) options.namespace_name = "SCCGenerated"; + if (!options.class_name) options.class_name = "SCC"; + if (!options.data_type_name) options.data_type_name = "SyntaxNode"; + if (!options.slex_namespace_name) options.slex_namespace_name = "SLexGenerated"; + if (!options.slex_class_name) options.slex_class_name = "SLex"; + if (!options.slex_data_type_name) options.slex_data_type_name = "Segment"; + if (!options.prefix) options.prefix = ""; + } + + FILE* f = fopen(input_file, "r"); + if (!f) { + fprintf(stderr, "Error: Could not open input file '%s'\n", input_file); + return 1; + } + + scc_rules rules; + if (!scc_read_rule_from_file(f, &rules)) { + fprintf(stderr, "Error: Failed to parse rules from input file '%s'\n", input_file); + fclose(f); + return 1; + } + fclose(f); + + FILE* out = fopen(output_file, "w"); + if (!out) { + fprintf(stderr, "Error: Could not open output file '%s'\n", output_file); + return 1; + } + + bool success = scc_translate_to_file(&options, &rules, out); + fclose(out); + + if (!success) { + fprintf(stderr, "Error: Parser translation failed\n"); + remove(output_file); + return 1; + } + + printf("Success: Generated parser written to '%s'\n", output_file); return 0; } \ No newline at end of file diff --git a/Source/scc.c b/Source/scc.c index cc49b62..d01a54a 100644 --- a/Source/scc.c +++ b/Source/scc.c @@ -1,6 +1,11 @@ #include "../Headers/scc_core.h" bool scc_translate_to_file(scc_options *options, scc_rules *rules, FILE *output_file){ - //Stub for now. + if (!options || !rules || !output_file) return false; + if (options->target_language == c_language) { + return scc_translate_to_file_c(options, rules, output_file); + } else if (options->target_language == csharp) { + return scc_translate_to_file_csharp(options, rules, output_file); + } return false; } \ No newline at end of file diff --git a/Source/scc_parser.c b/Source/scc_parser.c new file mode 100644 index 0000000..284ce5b --- /dev/null +++ b/Source/scc_parser.c @@ -0,0 +1,216 @@ +#include "../Headers/scc_core.h" +#include +#include +#include +#include + +// Helper: Trim leading and trailing whitespace +static char* trim_whitespace(char* str) { + while (isspace((unsigned char)*str)) { + str++; + } + if (*str == '\0') { + return str; + } + char* end = str + strlen(str) - 1; + while (end > str && isspace((unsigned char)*end)) { + end--; + } + *(end + 1) = '\0'; + return str; +} + +// Helper: Get next line from buffer +static bool get_next_line(char** cursor, char* line_buf, int max_len) { + char* c = *cursor; + if (*c == '\0') return false; + int idx = 0; + while (*c != '\0' && *c != '\n' && *c != '\r' && idx < max_len - 1) { + line_buf[idx++] = *c++; + } + line_buf[idx] = '\0'; + + // Skip newline characters + if (*c == '\r') c++; + if (*c == '\n') c++; + + *cursor = c; + return true; +} + +// Helper: Split line into tokens, respecting quotes +static void split_tokens(const char* str, char*** out_tokens, uint64_t* out_count) { + char** tokens = NULL; + uint64_t count = 0; + const char* p = str; + while (*p != '\0') { + while (isspace((unsigned char)*p)) p++; + if (*p == '\0') break; + + const char* start = p; + if (*p == '"') { + p++; + while (*p != '\0' && *p != '"') p++; + if (*p == '"') p++; + } else if (*p == '\'') { + p++; + while (*p != '\0' && *p != '\'') p++; + if (*p == '\'') p++; + } else { + while (*p != '\0' && !isspace((unsigned char)*p)) p++; + } + + size_t len = p - start; + char* token = (char*)malloc(len + 1); + memcpy(token, start, len); + token[len] = '\0'; + + count++; + tokens = (char**)realloc(tokens, count * sizeof(char*)); + tokens[count - 1] = token; + } + *out_tokens = tokens; + *out_count = count; +} + +bool scc_read_rule_from_cstr(char *content, scc_rules *output_rule) { + if (!content || !output_rule) return false; + + // Initialize output structure + output_rule->rules = NULL; + output_rule->rule_count = 0; + output_rule->syntax_ids = NULL; + output_rule->syntax_id_count = 0; + + typedef enum { + STATE_NONE, + STATE_SYNTAX_IDS, + STATE_RULES + } ParserState; + + ParserState state = STATE_NONE; + scc_rule* current_rule = NULL; + + char* cursor = content; + char line[4096]; + + while (get_next_line(&cursor, line, sizeof(line))) { + char* trimmed = trim_whitespace(line); + + // Skip comments and empty lines + if (trimmed[0] == '\0' || trimmed[0] == '#' || (trimmed[0] == '/' && trimmed[1] == '/')) { + continue; + } + + // Section switches + if (strcmp(trimmed, "syntax_ids:") == 0) { + state = STATE_SYNTAX_IDS; + continue; + } else if (strcmp(trimmed, "rules:") == 0) { + state = STATE_RULES; + continue; + } + + if (state == STATE_SYNTAX_IDS) { + // Trimmed line is a syntax ID + output_rule->syntax_id_count++; + output_rule->syntax_ids = (char**)realloc(output_rule->syntax_ids, output_rule->syntax_id_count * sizeof(char*)); + output_rule->syntax_ids[output_rule->syntax_id_count - 1] = strdup(trimmed); + } else if (state == STATE_RULES) { + if (strcmp(trimmed, ";") == 0) { + current_rule = NULL; + continue; + } + + if (trimmed[0] == ':' || trimmed[0] == '|') { + if (current_rule) { + char* match_part = trimmed + 1; + char** match_tokens = NULL; + uint64_t match_token_count = 0; + split_tokens(match_part, &match_tokens, &match_token_count); + + scc_matching* matching = (scc_matching*)malloc(sizeof(scc_matching)); + matching->match_ids = match_tokens; + matching->match_id_count = match_token_count; + matching->target_syntax_id = NULL; + matching->using_match_id = NULL; + matching->using_match_id_count = 0; + + current_rule->matching_count++; + current_rule->matchings = (scc_matching**)realloc(current_rule->matchings, current_rule->matching_count * sizeof(scc_matching*)); + current_rule->matchings[current_rule->matching_count - 1] = matching; + } + } else if (strncmp(trimmed, "=>", 2) == 0) { + if (current_rule && current_rule->matching_count > 0) { + scc_matching* matching = current_rule->matchings[current_rule->matching_count - 1]; + char* op_part = trimmed + 2; + char** op_tokens = NULL; + uint64_t op_token_count = 0; + split_tokens(op_part, &op_tokens, &op_token_count); + + if (op_token_count > 0) { + if (strcmp(op_tokens[0], "new_node") == 0) { + if (op_token_count > 1) { + matching->target_syntax_id = strdup(op_tokens[1]); + if (op_token_count > 2) { + matching->using_match_id_count = op_token_count - 2; + matching->using_match_id = (char**)malloc(matching->using_match_id_count * sizeof(char*)); + for (uint64_t i = 0; i < matching->using_match_id_count; i++) { + matching->using_match_id[i] = strdup(op_tokens[2 + i]); + } + } + } + } else if (strcmp(op_tokens[0], "append_as_child") == 0) { + matching->target_syntax_id = strdup("append_as_child"); + if (op_token_count > 1) { + matching->using_match_id_count = op_token_count - 1; + matching->using_match_id = (char**)malloc(matching->using_match_id_count * sizeof(char*)); + for (uint64_t i = 0; i < matching->using_match_id_count; i++) { + matching->using_match_id[i] = strdup(op_tokens[1 + i]); + } + } + } else if (strcmp(op_tokens[0], "skip") == 0) { + matching->target_syntax_id = strdup("skip"); + } + } + + // Free op_tokens + for (uint64_t i = 0; i < op_token_count; i++) { + free(op_tokens[i]); + } + free(op_tokens); + } + } else { + // Defines a new rule (node_type_name) + output_rule->rule_count++; + output_rule->rules = (scc_rule*)realloc(output_rule->rules, output_rule->rule_count * sizeof(scc_rule)); + + current_rule = &output_rule->rules[output_rule->rule_count - 1]; + current_rule->node_type_name = strdup(trimmed); + current_rule->matchings = NULL; + current_rule->matching_count = 0; + } + } + } + + return true; +} + +bool scc_read_rule_from_file(FILE *f, scc_rules *output_rule) { + if (!f || !output_rule) return false; + + // Determine file size + fseek(f, 0, SEEK_END); + long size = ftell(f); + fseek(f, 0, SEEK_SET); + + char* content = (char*)malloc(size + 1); + if (!content) return false; + + size_t read_bytes = fread(content, 1, size, f); + content[read_bytes] = '\0'; + + bool success = scc_read_rule_from_cstr(content, output_rule); + free(content); + return success; +}