#include "../../../Headers/scc_core.h" #include #include #include #include static bool is_rule_name(scc_rules* rules, const char* name) { for (uint64_t i = 0; i < rules->rule_count; i++) { if (strcmp(rules->rules[i].node_type_name, name) == 0) { return true; } } return false; } static bool is_syntax_id(scc_rules* rules, const char* name) { for (uint64_t i = 0; i < rules->syntax_id_count; i++) { if (strcmp(rules->syntax_ids[i], name) == 0) { return true; } } return false; } static void print_safe_c_string(FILE* f, const char* str) { fputc('"', f); for (int i = 0; str[i] != '\0'; i++) { if (str[i] == '"') { fprintf(f, "\\\""); } else if (str[i] == '\\') { fprintf(f, "\\\\"); } else { fputc(str[i], f); } } fputc('"', f); } static void generate_declarations(FILE* f, scc_options* options, scc_rules* rules) { char* prefix = options->prefix ? options->prefix : "scc_"; char* data_type = options->data_type_name ? options->data_type_name : "scc_syntax_node"; char* slex_data_type = options->slex_data_type_name ? options->slex_data_type_name : "slex_segment"; fprintf(f, "#include \n"); fprintf(f, "#include \n"); fprintf(f, "#include \n\n"); // Forward declare slex segment fprintf(f, "struct %s;\n", slex_data_type); fprintf(f, "typedef struct %s %s;\n\n", slex_data_type, slex_data_type); // scc_syntax_id enum fprintf(f, "typedef enum %ssyntax_id {\n", prefix); fprintf(f, " %sid_default = 0,\n", prefix); for (uint64_t i = 0; i < rules->syntax_id_count; i++) { fprintf(f, " %sid_%s,\n", prefix, rules->syntax_ids[i]); } fprintf(f, "} %ssyntax_id;\n\n", prefix); // scc_syntax_node_type enum fprintf(f, "typedef enum %ssyntax_node_type {\n", prefix); fprintf(f, " %snode,\n", prefix); fprintf(f, " %ssegment\n", prefix); fprintf(f, "} %ssyntax_node_type;\n\n", prefix); // scc_syntax_node_enclosure struct forward decl fprintf(f, "struct %ssyntax_node_enclosure;\n\n", prefix); // scc_syntax_node struct fprintf(f, "typedef struct %s {\n", data_type); fprintf(f, " enum %ssyntax_id id;\n", prefix); fprintf(f, " char* syntax_name;\n"); fprintf(f, " struct %ssyntax_node_enclosure* children;\n", prefix); fprintf(f, " uint64_t child_count;\n"); fprintf(f, " struct %s* parent;\n", data_type); fprintf(f, "} %s;\n\n", data_type); // scc_syntax_node_enclosure struct fprintf(f, "typedef struct %ssyntax_node_enclosure {\n", prefix); fprintf(f, " enum %ssyntax_node_type type;\n", prefix); fprintf(f, " void* data;\n"); fprintf(f, "} %ssyntax_node_enclosure;\n\n", prefix); // Matching structures definitions fprintf(f, "typedef struct %smatching_rule_def {\n", prefix); fprintf(f, " const char* target_syntax_id;\n"); fprintf(f, " const char** match_ids;\n"); fprintf(f, " uint64_t match_id_count;\n"); fprintf(f, " const char** using_match_ids;\n"); fprintf(f, " uint64_t using_match_id_count;\n"); fprintf(f, "} %smatching_rule_def;\n\n", prefix); fprintf(f, "typedef struct %srule_def {\n", prefix); fprintf(f, " const char* node_type_name;\n"); fprintf(f, " const struct %smatching_rule_def* matchings;\n", prefix); fprintf(f, " uint64_t matching_count;\n"); fprintf(f, "} %srule_def;\n\n", prefix); // Function declarations fprintf(f, "char %sparse(%s* head, %s** output);\n", prefix, slex_data_type, data_type); fprintf(f, "char %sfree(%s* root);\n\n", prefix, data_type); } bool scc_translate_to_file_c(scc_options *options, scc_rules *rules, FILE *output_file) { if (!options || !rules || !output_file) return false; char* prefix = options->prefix ? options->prefix : "scc_"; char* data_type = options->data_type_name ? options->data_type_name : "scc_syntax_node"; char* slex_prefix = options->slex_prefix ? options->slex_prefix : "slex_"; char* slex_data_type = options->slex_data_type_name ? options->slex_data_type_name : "slex_segment"; // 1. Open header file if requested FILE* header_f = NULL; char* header_base = NULL; if (options->header_output && strlen(options->header_output) > 0) { header_f = fopen(options->header_output, "w"); if (!header_f) return false; char* last_slash = strrchr(options->header_output, '/'); char* last_backslash = strrchr(options->header_output, '\\'); char* base = options->header_output; if (last_slash && last_slash > base) base = last_slash + 1; if (last_backslash && last_backslash > base) base = last_backslash + 1; header_base = strdup(base); fprintf(header_f, "#ifndef __SCC_GENERATED_H__\n"); fprintf(header_f, "#define __SCC_GENERATED_H__\n\n"); generate_declarations(header_f, options, rules); fprintf(header_f, "#endif\n"); fclose(header_f); } // 2. Write implementation fprintf(output_file, "#include \n"); fprintf(output_file, "#include \n"); fprintf(output_file, "#include \n"); fprintf(output_file, "#include \n"); if (options->slex_header && strlen(options->slex_header) > 0) { fprintf(output_file, "#include \"%s\"\n\n", options->slex_header); } else { fprintf(output_file, "#include \"slex_generated.h\"\n\n"); } if (header_base) { fprintf(output_file, "#include \"%s\"\n\n", header_base); free(header_base); } else { generate_declarations(output_file, options, rules); } // Generate unique slex tags/ids list used as terminals // We scan all rules' matchings to collect unique non-rule, non-quoted match_ids char** terminals = NULL; uint64_t terminal_count = 0; for (uint64_t r = 0; r < rules->rule_count; r++) { scc_rule* rule = &rules->rules[r]; for (uint64_t m = 0; m < rule->matching_count; m++) { scc_matching* matching = rule->matchings[m]; for (uint64_t i = 0; i < matching->match_id_count; i++) { char* match_id = matching->match_ids[i]; size_t len = strlen(match_id); bool is_literal = len >= 2 && ((match_id[0] == '"' && match_id[len-1] == '"') || (match_id[0] == '\'' && match_id[len-1] == '\'')); if (!is_literal && !is_rule_name(rules, match_id)) { // Check if already in list bool exists = false; for (uint64_t j = 0; j < terminal_count; j++) { if (strcmp(terminals[j], match_id) == 0) { exists = true; break; } } if (!exists) { terminal_count++; terminals = (char**)realloc(terminals, terminal_count * sizeof(char*)); terminals[terminal_count - 1] = match_id; } } } } } // Helper functions for matching fprintf(output_file, "static bool match_element(const %ssyntax_node_enclosure* enc, const char* pattern) {\n", prefix); fprintf(output_file, " if (!enc || !pattern) return false;\n"); fprintf(output_file, " if (enc->type == %snode) {\n", prefix); fprintf(output_file, " %s* node = (%s*)enc->data;\n", data_type, data_type); fprintf(output_file, " return strcmp(node->syntax_name, pattern) == 0;\n"); fprintf(output_file, " } else {\n"); fprintf(output_file, " %s* segment = (%s*)enc->data;\n", slex_data_type, slex_data_type); fprintf(output_file, " size_t len = strlen(pattern);\n"); fprintf(output_file, " if (len >= 2 && ((pattern[0] == '\"' && pattern[len-1] == '\"') || (pattern[0] == '\\'' && pattern[len-1] == '\\''))) {\n"); fprintf(output_file, " return (segment->length == (int64_t)(len - 2)) && (strncmp(segment->head, pattern + 1, len - 2) == 0);\n"); fprintf(output_file, " }\n"); for (uint64_t i = 0; i < terminal_count; i++) { fprintf(output_file, " if (strcmp(pattern, \"%s\") == 0) {\n", terminals[i]); fprintf(output_file, " return segment->tag == %stag_%s || segment->id == %sid_%s;\n", slex_prefix, terminals[i], slex_prefix, terminals[i]); fprintf(output_file, " }\n"); } fprintf(output_file, " return false;\n"); fprintf(output_file, " }\n"); fprintf(output_file, "}\n\n"); if (terminals) free(terminals); // get_syntax_id function fprintf(output_file, "static enum %ssyntax_id get_syntax_id(const char* name) {\n", prefix); for (uint64_t i = 0; i < rules->syntax_id_count; i++) { fprintf(output_file, " if (strcmp(name, \"%s\") == 0) return %sid_%s;\n", rules->syntax_ids[i], prefix, rules->syntax_ids[i]); } fprintf(output_file, " return %sid_default;\n", prefix); fprintf(output_file, "}\n\n"); // clone_segment function fprintf(output_file, "static %s* clone_segment(const %s* src) {\n", slex_data_type, slex_data_type); fprintf(output_file, " if (!src) return NULL;\n"); fprintf(output_file, " %s* dst = (%s*)malloc(sizeof(%s));\n", slex_data_type, slex_data_type, slex_data_type); fprintf(output_file, " dst->head = src->head ? strdup(src->head) : NULL;\n"); fprintf(output_file, " dst->length = src->length;\n"); fprintf(output_file, " dst->file_name = src->file_name ? strdup(src->file_name) : NULL;\n"); fprintf(output_file, " dst->line = src->line;\n"); fprintf(output_file, " dst->col = src->col;\n"); fprintf(output_file, " dst->tag = src->tag;\n"); fprintf(output_file, " dst->id = src->id;\n"); fprintf(output_file, " dst->prev = NULL;\n"); fprintf(output_file, " dst->next = NULL;\n"); fprintf(output_file, " return dst;\n"); fprintf(output_file, "}\n\n"); // free_segment function fprintf(output_file, "static void free_segment(%s* seg) {\n", slex_data_type); fprintf(output_file, " if (!seg) return;\n"); fprintf(output_file, " if (seg->head) free(seg->head);\n"); fprintf(output_file, " if (seg->file_name) free(seg->file_name);\n"); fprintf(output_file, " free(seg);\n"); fprintf(output_file, "}\n\n"); // clone_tree and free_tree functions forward declaration fprintf(output_file, "static %s* clone_tree(const %s* src);\n", data_type, data_type); fprintf(output_file, "static void free_tree(%s* node);\n\n", data_type); // clone_tree implementation fprintf(output_file, "static %s* clone_tree(const %s* src) {\n", data_type, data_type); fprintf(output_file, " if (!src) return NULL;\n"); fprintf(output_file, " %s* dst = (%s*)malloc(sizeof(%s));\n", data_type, data_type, data_type); fprintf(output_file, " dst->id = src->id;\n"); fprintf(output_file, " dst->syntax_name = src->syntax_name ? strdup(src->syntax_name) : NULL;\n"); fprintf(output_file, " dst->child_count = src->child_count;\n"); fprintf(output_file, " if (dst->child_count > 0) {\n"); fprintf(output_file, " dst->children = (%ssyntax_node_enclosure*)malloc(dst->child_count * sizeof(%ssyntax_node_enclosure));\n", prefix, prefix); fprintf(output_file, " for (uint64_t i = 0; i < dst->child_count; i++) {\n"); fprintf(output_file, " dst->children[i].type = src->children[i].type;\n"); fprintf(output_file, " if (src->children[i].type == %snode) {\n", prefix); fprintf(output_file, " dst->children[i].data = clone_tree((%s*)src->children[i].data);\n", data_type); fprintf(output_file, " ((%s*)dst->children[i].data)->parent = dst;\n", data_type); fprintf(output_file, " } else {\n"); fprintf(output_file, " dst->children[i].data = clone_segment((%s*)src->children[i].data);\n", slex_data_type); fprintf(output_file, " }\n"); fprintf(output_file, " }\n"); fprintf(output_file, " } else {\n"); fprintf(output_file, " dst->children = NULL;\n"); fprintf(output_file, " }\n"); fprintf(output_file, " dst->parent = NULL;\n"); fprintf(output_file, " return dst;\n"); fprintf(output_file, "}\n\n"); // free_tree implementation fprintf(output_file, "static void free_tree(%s* node) {\n", data_type); fprintf(output_file, " if (!node) return;\n"); fprintf(output_file, " if (node->syntax_name) free(node->syntax_name);\n"); fprintf(output_file, " for (uint64_t i = 0; i < node->child_count; i++) {\n"); fprintf(output_file, " if (node->children[i].type == %snode) {\n", prefix); fprintf(output_file, " free_tree((%s*)node->children[i].data);\n", data_type); fprintf(output_file, " } else {\n"); fprintf(output_file, " free_segment((%s*)node->children[i].data);\n", slex_data_type); fprintf(output_file, " }\n"); fprintf(output_file, " }\n"); fprintf(output_file, " if (node->children) free(node->children);\n"); fprintf(output_file, " free(node);\n"); fprintf(output_file, "}\n\n"); // Generate matching rules tables for (uint64_t r = 0; r < rules->rule_count; r++) { scc_rule* rule = &rules->rules[r]; for (uint64_t m = 0; m < rule->matching_count; m++) { scc_matching* matching = rule->matchings[m]; fprintf(output_file, "static const char* match_ids_%d_%d[] = {", (int)r, (int)m); for (uint64_t i = 0; i < matching->match_id_count; i++) { print_safe_c_string(output_file, matching->match_ids[i]); if (i < matching->match_id_count - 1) fprintf(output_file, ", "); } fprintf(output_file, "};\n"); fprintf(output_file, "static const char* using_match_ids_%d_%d[] = {", (int)r, (int)m); for (uint64_t i = 0; i < matching->using_match_id_count; i++) { print_safe_c_string(output_file, matching->using_match_id[i]); if (i < matching->using_match_id_count - 1) fprintf(output_file, ", "); } fprintf(output_file, "};\n"); } if (rule->matching_count > 0) { fprintf(output_file, "static const %smatching_rule_def matchings_%d[] = {\n", prefix, (int)r); for (uint64_t m = 0; m < rule->matching_count; m++) { scc_matching* matching = rule->matchings[m]; fprintf(output_file, " { "); print_safe_c_string(output_file, matching->target_syntax_id ? matching->target_syntax_id : "skip"); fprintf(output_file, ", match_ids_%d_%d, %d, using_match_ids_%d_%d, %d }", (int)r, (int)m, (int)matching->match_id_count, (int)r, (int)m, (int)matching->using_match_id_count); if (m < rule->matching_count - 1) fprintf(output_file, ",\n"); } fprintf(output_file, "\n};\n"); } } // RULES array fprintf(output_file, "static const %srule_def RULES[] = {\n", prefix); for (uint64_t r = 0; r < rules->rule_count; r++) { scc_rule* rule = &rules->rules[r]; if (rule->matching_count > 0) { fprintf(output_file, " { "); print_safe_c_string(output_file, rule->node_type_name); fprintf(output_file, ", matchings_%d, %d }", (int)r, (int)rule->matching_count); } else { fprintf(output_file, " { "); print_safe_c_string(output_file, rule->node_type_name); fprintf(output_file, ", NULL, 0 }"); } if (r < rules->rule_count - 1) fprintf(output_file, ",\n"); } fprintf(output_file, "\n};\n\n"); // parse_step function fprintf(output_file, "static bool parse_step(%ssyntax_node_enclosure* stack, uint64_t stack_len, %s* input, %s** result, int depth) {\n", prefix, slex_data_type, data_type); fprintf(output_file, " if (depth > 2000) return false;\n\n"); fprintf(output_file, " if (input == NULL) {\n"); fprintf(output_file, " if (stack_len == 1 && stack[0].type == %snode) {\n", prefix); fprintf(output_file, " %s* node = (%s*)stack[0].data;\n", data_type, data_type); fprintf(output_file, " if (strcmp(node->syntax_name, \"%s\") == 0) {\n", rules->rules[0].node_type_name); fprintf(output_file, " *result = node;\n"); fprintf(output_file, " return true;\n"); fprintf(output_file, " }\n"); fprintf(output_file, " }\n"); fprintf(output_file, " }\n\n"); // Try reductions fprintf(output_file, " for (uint64_t r = 0; r < %d; r++) {\n", (int)rules->rule_count); fprintf(output_file, " const %srule_def* rule = &RULES[r];\n", prefix); fprintf(output_file, " for (uint64_t m = 0; m < rule->matching_count; m++) {\n"); fprintf(output_file, " const %smatching_rule_def* match = &rule->matchings[m];\n", prefix); fprintf(output_file, " if (stack_len >= match->match_id_count) {\n"); fprintf(output_file, " bool match_ok = true;\n"); fprintf(output_file, " uint64_t stack_offset = stack_len - match->match_id_count;\n"); fprintf(output_file, " for (uint64_t i = 0; i < match->match_id_count; i++) {\n"); fprintf(output_file, " if (!match_element(&stack[stack_offset + i], match->match_ids[i])) {\n"); fprintf(output_file, " match_ok = false;\n"); fprintf(output_file, " break;\n"); fprintf(output_file, " }\n"); fprintf(output_file, " }\n"); fprintf(output_file, " if (match_ok) {\n"); fprintf(output_file, " uint64_t new_stack_len = stack_offset;\n"); fprintf(output_file, " bool skip_push = false;\n"); fprintf(output_file, " %ssyntax_node_enclosure reduced_enc;\n", prefix); fprintf(output_file, " reduced_enc.type = %snode;\n", prefix); fprintf(output_file, " reduced_enc.data = NULL;\n\n"); fprintf(output_file, " if (strcmp(match->target_syntax_id, \"skip\") == 0) {\n"); fprintf(output_file, " skip_push = true;\n"); fprintf(output_file, " } else if (strcmp(match->target_syntax_id, \"append_as_child\") == 0) {\n"); fprintf(output_file, " if (match->match_id_count >= 1 && stack[stack_offset].type == %snode) {\n", prefix); fprintf(output_file, " %s* parent_node = (%s*)stack[stack_offset].data;\n", data_type, data_type); fprintf(output_file, " %s* cloned_parent = clone_tree(parent_node);\n", data_type); fprintf(output_file, " if (match->using_match_id_count >= 1) {\n"); fprintf(output_file, " int child_idx = atoi(match->using_match_ids[0] + 1);\n"); fprintf(output_file, " %ssyntax_node_enclosure child_enc = stack[stack_offset + child_idx];\n", prefix); fprintf(output_file, " if (child_enc.type == %snode) {\n", prefix); fprintf(output_file, " child_enc.data = clone_tree((%s*)child_enc.data);\n", data_type); fprintf(output_file, " } else {\n"); fprintf(output_file, " child_enc.data = clone_segment((%s*)child_enc.data);\n", slex_data_type); fprintf(output_file, " }\n"); fprintf(output_file, " cloned_parent->child_count++;\n"); fprintf(output_file, " cloned_parent->children = (%ssyntax_node_enclosure*)realloc(cloned_parent->children, cloned_parent->child_count * sizeof(%ssyntax_node_enclosure));\n", prefix, prefix); fprintf(output_file, " cloned_parent->children[cloned_parent->child_count - 1] = child_enc;\n"); fprintf(output_file, " if (child_enc.type == %snode) {\n", prefix); fprintf(output_file, " ((%s*)child_enc.data)->parent = cloned_parent;\n", data_type); fprintf(output_file, " }\n"); fprintf(output_file, " }\n"); fprintf(output_file, " reduced_enc.data = cloned_parent;\n"); fprintf(output_file, " new_stack_len++;\n"); fprintf(output_file, " } else {\n"); fprintf(output_file, " match_ok = false;\n"); fprintf(output_file, " }\n"); fprintf(output_file, " } else {\n"); fprintf(output_file, " %s* new_node = (%s*)malloc(sizeof(%s));\n", data_type, data_type, data_type); fprintf(output_file, " new_node->id = get_syntax_id(match->target_syntax_id);\n"); fprintf(output_file, " new_node->syntax_name = strdup(rule->node_type_name);\n"); fprintf(output_file, " new_node->child_count = match->using_match_id_count;\n"); fprintf(output_file, " new_node->children = new_node->child_count > 0 ? (%ssyntax_node_enclosure*)malloc(new_node->child_count * sizeof(%ssyntax_node_enclosure)) : NULL;\n", prefix, prefix); fprintf(output_file, " new_node->parent = NULL;\n"); fprintf(output_file, " for (uint64_t i = 0; i < match->using_match_id_count; i++) {\n"); fprintf(output_file, " int idx = atoi(match->using_match_ids[i] + 1);\n"); fprintf(output_file, " %ssyntax_node_enclosure child_enc = stack[stack_offset + idx];\n", prefix); fprintf(output_file, " if (child_enc.type == %snode) {\n", prefix); fprintf(output_file, " child_enc.data = clone_tree((%s*)child_enc.data);\n", data_type); fprintf(output_file, " } else {\n"); fprintf(output_file, " child_enc.data = clone_segment((%s*)child_enc.data);\n", slex_data_type); fprintf(output_file, " }\n"); fprintf(output_file, " new_node->children[i] = child_enc;\n"); fprintf(output_file, " if (child_enc.type == %snode) {\n", prefix); fprintf(output_file, " ((%s*)child_enc.data)->parent = new_node;\n", data_type); fprintf(output_file, " }\n"); fprintf(output_file, " }\n"); fprintf(output_file, " reduced_enc.data = new_node;\n"); fprintf(output_file, " new_stack_len++;\n"); fprintf(output_file, " }\n\n"); fprintf(output_file, " if (match_ok) {\n"); fprintf(output_file, " %ssyntax_node_enclosure* new_stack = (%ssyntax_node_enclosure*)malloc((new_stack_len + 1) * sizeof(%ssyntax_node_enclosure));\n", prefix, prefix, prefix); fprintf(output_file, " for (uint64_t i = 0; i < stack_offset; i++) {\n"); fprintf(output_file, " new_stack[i] = stack[i];\n"); fprintf(output_file, " }\n"); fprintf(output_file, " if (!skip_push) {\n"); fprintf(output_file, " new_stack[new_stack_len - 1] = reduced_enc;\n"); fprintf(output_file, " }\n"); fprintf(output_file, " if (parse_step(new_stack, new_stack_len, input, result, depth + 1)) {\n"); fprintf(output_file, " free(new_stack);\n"); fprintf(output_file, " return true;\n"); fprintf(output_file, " }\n"); fprintf(output_file, " if (!skip_push && reduced_enc.data) {\n"); fprintf(output_file, " free_tree((%s*)reduced_enc.data);\n", data_type); fprintf(output_file, " }\n"); fprintf(output_file, " free(new_stack);\n"); fprintf(output_file, " }\n"); fprintf(output_file, " }\n"); fprintf(output_file, " }\n"); fprintf(output_file, " }\n"); fprintf(output_file, " }\n\n"); // Try shift fprintf(output_file, " if (input != NULL) {\n"); fprintf(output_file, " uint64_t new_stack_len = stack_len + 1;\n"); fprintf(output_file, " %ssyntax_node_enclosure* new_stack = (%ssyntax_node_enclosure*)malloc(new_stack_len * sizeof(%ssyntax_node_enclosure));\n", prefix, prefix, prefix); fprintf(output_file, " for (uint64_t i = 0; i < stack_len; i++) {\n"); fprintf(output_file, " new_stack[i] = stack[i];\n"); fprintf(output_file, " }\n"); fprintf(output_file, " new_stack[stack_len].type = %ssegment;\n", prefix); fprintf(output_file, " new_stack[stack_len].data = clone_segment(input);\n"); fprintf(output_file, " if (parse_step(new_stack, new_stack_len, input->next, result, depth + 1)) {\n"); fprintf(output_file, " free(new_stack);\n"); fprintf(output_file, " return true;\n"); fprintf(output_file, " }\n"); fprintf(output_file, " free_segment((%s*)new_stack[stack_len].data);\n", slex_data_type); fprintf(output_file, " free(new_stack);\n"); fprintf(output_file, " }\n\n"); fprintf(output_file, " return false;\n"); fprintf(output_file, "}\n\n"); // Implement public functions scc_parse and scc_free fprintf(output_file, "char %sparse(%s* head, %s** output) {\n", prefix, slex_data_type, data_type); fprintf(output_file, " if (!head || !output) return 0;\n"); fprintf(output_file, " *output = NULL;\n"); fprintf(output_file, " %ssyntax_node_enclosure* stack = NULL;\n", prefix); fprintf(output_file, " bool success = parse_step(stack, 0, head, output, 0);\n"); fprintf(output_file, " return success ? 1 : 0;\n"); fprintf(output_file, "}\n\n"); fprintf(output_file, "char %sfree(%s* root) {\n", prefix, data_type); fprintf(output_file, " if (!root) return 0;\n"); fprintf(output_file, " free_tree(root);\n"); fprintf(output_file, " return 1;\n"); fprintf(output_file, "}\n"); return true; }