Finished the implemenation with Antigravity.
This commit is contained in:
@@ -1,7 +1,320 @@
|
||||
#include "../../../Headers/slex_core.h"
|
||||
#include "../../../Headers/slex_regex.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// Helper to check if a tag is mapped to an ID, and return it.
|
||||
static const char* get_mapped_id(slex_rules* rules, const char* tag) {
|
||||
for (uint64_t i = 0; i < rules->mapping_count; i++) {
|
||||
if (strcmp(rules->mappings[i].Tag, tag) == 0) {
|
||||
return rules->mappings[i].Id;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void generate_declarations(FILE* f, slex_options* options, slex_rules* rules) {
|
||||
char* prefix = options->prefix ? options->prefix : "slex_";
|
||||
char* data_type = options->data_type_name ? options->data_type_name : "slex_segment";
|
||||
|
||||
fprintf(f, "#include <stdint.h>\n");
|
||||
fprintf(f, "#include <stdio.h>\n\n");
|
||||
|
||||
// slex_segment_tag enum
|
||||
fprintf(f, "typedef enum %ssegment_tag {\n", prefix);
|
||||
for (uint64_t i = 0; i < rules->rule_count; i++) {
|
||||
fprintf(f, " %stag_%s,\n", prefix, rules->rules[i].Tag);
|
||||
}
|
||||
fprintf(f, "} %ssegment_tag;\n\n", prefix);
|
||||
|
||||
// slex_segment_id enum
|
||||
fprintf(f, "typedef enum %ssegment_id {\n", prefix);
|
||||
fprintf(f, " %sid_default = 0,\n", prefix);
|
||||
// Find all unique mapping Ids
|
||||
for (uint64_t i = 0; i < rules->mapping_count; i++) {
|
||||
// Only print if not already printed (unique check)
|
||||
bool unique = true;
|
||||
for (uint64_t j = 0; j < i; j++) {
|
||||
if (strcmp(rules->mappings[j].Id, rules->mappings[i].Id) == 0) {
|
||||
unique = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (unique) {
|
||||
fprintf(f, " %sid_%s,\n", prefix, rules->mappings[i].Id);
|
||||
}
|
||||
}
|
||||
fprintf(f, "} %ssegment_id;\n\n", prefix);
|
||||
|
||||
// slex_segment struct
|
||||
fprintf(f, "typedef struct %s {\n", data_type);
|
||||
fprintf(f, " char* head;\n");
|
||||
fprintf(f, " int64_t length;\n");
|
||||
fprintf(f, " char* file_name;\n");
|
||||
fprintf(f, " int64_t line;\n");
|
||||
fprintf(f, " int64_t col;\n");
|
||||
fprintf(f, " %ssegment_tag tag;\n", prefix);
|
||||
fprintf(f, " %ssegment_id id;\n", prefix);
|
||||
fprintf(f, " struct %s* prev;\n", data_type);
|
||||
fprintf(f, " struct %s* next;\n", data_type);
|
||||
fprintf(f, "} %s;\n\n", data_type);
|
||||
|
||||
// post_process_result enum
|
||||
fprintf(f, "typedef enum %spost_process_result {\n", prefix);
|
||||
fprintf(f, " %scontinue,\n", prefix);
|
||||
fprintf(f, " %sskip,\n", prefix);
|
||||
fprintf(f, " %scontinue_with_output,\n", prefix);
|
||||
fprintf(f, "} %spost_process_result;\n\n", prefix);
|
||||
|
||||
// Function declarations
|
||||
fprintf(f, "%spost_process_result %spost_process(%s* input, %s** output);\n", prefix, prefix, data_type, data_type);
|
||||
fprintf(f, "char %sfile(FILE* f, char* file_name, %s** head);\n", prefix, data_type);
|
||||
fprintf(f, "char %scstr(char* input, char* file_name, %s** head);\n", prefix, data_type);
|
||||
fprintf(f, "char %sfree(%s* head);\n\n", prefix, data_type);
|
||||
}
|
||||
|
||||
bool slex_translate_to_file_c(slex_options *options, slex_rules *rules, FILE *output_file)
|
||||
{
|
||||
// TODO: Stub for moment.
|
||||
return false;
|
||||
if (!options || !rules || !output_file) return false;
|
||||
|
||||
char* prefix = options->prefix ? options->prefix : "slex_";
|
||||
char* data_type = options->data_type_name ? options->data_type_name : "slex_segment";
|
||||
|
||||
// 1. Compile regexes to DFA
|
||||
char** patterns = (char**)malloc(rules->rule_count * sizeof(char*));
|
||||
for (uint64_t i = 0; i < rules->rule_count; i++) {
|
||||
patterns[i] = rules->rules[i].Pattern;
|
||||
}
|
||||
int dfa_state_count = 0;
|
||||
DFAState* dfa = slex_compile_regexes(patterns, (int)rules->rule_count, &dfa_state_count);
|
||||
free(patterns);
|
||||
|
||||
if (!dfa) return false;
|
||||
|
||||
// 2. Open header file if requested
|
||||
FILE* header_f = NULL;
|
||||
char* header_base = NULL;
|
||||
if (options->header_output && strlen(options->header_output) > 0) {
|
||||
header_f = fopen(options->header_output, "w");
|
||||
if (!header_f) {
|
||||
slex_free_dfa(dfa, dfa_state_count);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find basename of header_output to include it in the implementation
|
||||
char* last_slash = strrchr(options->header_output, '/');
|
||||
char* last_backslash = strrchr(options->header_output, '\\');
|
||||
char* base = options->header_output;
|
||||
if (last_slash && last_slash > base) base = last_slash + 1;
|
||||
if (last_backslash && last_backslash > base) base = last_backslash + 1;
|
||||
header_base = strdup(base);
|
||||
|
||||
// Write header guard
|
||||
fprintf(header_f, "#ifndef __SLEX_GENERATED_H__\n");
|
||||
fprintf(header_f, "#define __SLEX_GENERATED_H__\n\n");
|
||||
generate_declarations(header_f, options, rules);
|
||||
fprintf(header_f, "#endif\n");
|
||||
fclose(header_f);
|
||||
}
|
||||
|
||||
// 3. Write implementation to output_file
|
||||
fprintf(output_file, "#include <stdio.h>\n");
|
||||
fprintf(output_file, "#include <stdlib.h>\n");
|
||||
fprintf(output_file, "#include <string.h>\n");
|
||||
fprintf(output_file, "#include <stdbool.h>\n\n");
|
||||
|
||||
if (header_base) {
|
||||
fprintf(output_file, "#include \"%s\"\n\n", header_base);
|
||||
free(header_base);
|
||||
} else {
|
||||
// Output inline declarations
|
||||
generate_declarations(output_file, options, rules);
|
||||
}
|
||||
|
||||
// Extract variables and post-processor code
|
||||
char* variables = "";
|
||||
char* post_processor = "";
|
||||
for (uint64_t i = 0; i < rules->code_block_count; i++) {
|
||||
if (rules->code_blocks[i].target_languge == c_language) {
|
||||
if (rules->code_blocks[i].variables) {
|
||||
variables = rules->code_blocks[i].variables;
|
||||
}
|
||||
if (rules->code_blocks[i].post_processor_code) {
|
||||
post_processor = rules->code_blocks[i].post_processor_code;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Write variables
|
||||
if (strlen(variables) > 0) {
|
||||
fprintf(output_file, "/* --- User Variables --- */\n%s\n/* ---------------------- */\n\n", variables);
|
||||
}
|
||||
|
||||
// Write slex_post_process function
|
||||
fprintf(output_file, "%spost_process_result %spost_process(%s* input, %s** output) {\n", prefix, prefix, data_type, data_type);
|
||||
if (strlen(post_processor) > 0) {
|
||||
fprintf(output_file, "%s\n", post_processor);
|
||||
} else {
|
||||
// Default implementation
|
||||
fprintf(output_file, " *output = input;\n");
|
||||
fprintf(output_file, " return %scontinue;\n", prefix);
|
||||
}
|
||||
fprintf(output_file, "}\n\n");
|
||||
|
||||
// Write transition table
|
||||
fprintf(output_file, "static const int transitions[%d][256] = {\n", dfa_state_count);
|
||||
for (int i = 0; i < dfa_state_count; i++) {
|
||||
fprintf(output_file, " {");
|
||||
for (int c = 0; c < 256; c++) {
|
||||
fprintf(output_file, "%d", dfa[i].transitions[c]);
|
||||
if (c < 255) fprintf(output_file, ", ");
|
||||
}
|
||||
fprintf(output_file, "}");
|
||||
if (i < dfa_state_count - 1) fprintf(output_file, ",\n");
|
||||
else fprintf(output_file, "\n");
|
||||
}
|
||||
fprintf(output_file, "};\n\n");
|
||||
|
||||
// Write accepting rules table
|
||||
fprintf(output_file, "static const int accepting_rules[%d] = {\n ", dfa_state_count);
|
||||
for (int i = 0; i < dfa_state_count; i++) {
|
||||
fprintf(output_file, "%d", dfa[i].accept_rule_index);
|
||||
if (i < dfa_state_count - 1) fprintf(output_file, ", ");
|
||||
}
|
||||
fprintf(output_file, "\n};\n\n");
|
||||
|
||||
// Write mapping mappings table/resolver
|
||||
fprintf(output_file, "static void assign_tag_and_id(%s* node, int rule_idx) {\n", data_type);
|
||||
fprintf(output_file, " switch(rule_idx) {\n");
|
||||
for (uint64_t i = 0; i < rules->rule_count; i++) {
|
||||
fprintf(output_file, " case %d:\n", (int)i);
|
||||
fprintf(output_file, " node->tag = %stag_%s;\n", prefix, rules->rules[i].Tag);
|
||||
const char* mapped_id = get_mapped_id(rules, rules->rules[i].Tag);
|
||||
if (mapped_id) {
|
||||
fprintf(output_file, " node->id = %sid_%s;\n", prefix, mapped_id);
|
||||
} else {
|
||||
fprintf(output_file, " node->id = %sid_default;\n", prefix);
|
||||
}
|
||||
fprintf(output_file, " break;\n");
|
||||
}
|
||||
fprintf(output_file, " }\n");
|
||||
fprintf(output_file, "}\n\n");
|
||||
|
||||
// Write slex_free function
|
||||
fprintf(output_file, "char %sfree(%s* head) {\n", prefix, data_type);
|
||||
fprintf(output_file, " while (head) {\n");
|
||||
fprintf(output_file, " %s* next = head->next;\n", data_type);
|
||||
fprintf(output_file, " free(head->head);\n");
|
||||
fprintf(output_file, " if (head->file_name) free(head->file_name);\n");
|
||||
fprintf(output_file, " free(head);\n");
|
||||
fprintf(output_file, " head = next;\n");
|
||||
fprintf(output_file, " }\n");
|
||||
fprintf(output_file, " return 1;\n");
|
||||
fprintf(output_file, "}\n\n");
|
||||
|
||||
// Write slex_cstr function (DFA matching loop)
|
||||
fprintf(output_file, "char %scstr(char* input, char* file_name, %s** head) {\n", prefix, data_type);
|
||||
fprintf(output_file, " if (!input || !head) return 0;\n");
|
||||
fprintf(output_file, " *head = NULL;\n");
|
||||
fprintf(output_file, " %s* tail = NULL;\n", data_type);
|
||||
fprintf(output_file, " char* p = input;\n");
|
||||
fprintf(output_file, " int64_t current_line = 1;\n");
|
||||
fprintf(output_file, " int64_t current_col = 1;\n\n");
|
||||
fprintf(output_file, " while (*p != '\\0') {\n");
|
||||
fprintf(output_file, " int state = 0;\n");
|
||||
fprintf(output_file, " char* match_end = NULL;\n");
|
||||
fprintf(output_file, " int match_rule = -1;\n");
|
||||
fprintf(output_file, " int64_t token_line = current_line;\n");
|
||||
fprintf(output_file, " int64_t token_col = current_col;\n\n");
|
||||
fprintf(output_file, " char* curr_p = p;\n");
|
||||
fprintf(output_file, " while (*curr_p != '\\0') {\n");
|
||||
fprintf(output_file, " unsigned char c = (unsigned char)*curr_p;\n");
|
||||
fprintf(output_file, " int next_state = transitions[state][c];\n");
|
||||
fprintf(output_file, " if (next_state == -1) break;\n");
|
||||
fprintf(output_file, " state = next_state;\n");
|
||||
fprintf(output_file, " if (accepting_rules[state] != -1) {\n");
|
||||
fprintf(output_file, " match_end = curr_p + 1;\n");
|
||||
fprintf(output_file, " match_rule = accepting_rules[state];\n");
|
||||
fprintf(output_file, " }\n");
|
||||
fprintf(output_file, " curr_p++;\n");
|
||||
fprintf(output_file, " }\n\n");
|
||||
fprintf(output_file, " if (match_rule != -1 && match_end > p) {\n");
|
||||
fprintf(output_file, " int64_t len = match_end - p;\n");
|
||||
fprintf(output_file, " %s* node = (%s*)malloc(sizeof(%s));\n", data_type, data_type, data_type);
|
||||
fprintf(output_file, " node->head = (char*)malloc(len + 1);\n");
|
||||
fprintf(output_file, " memcpy(node->head, p, len);\n");
|
||||
fprintf(output_file, " node->head[len] = '\\0';\n");
|
||||
fprintf(output_file, " node->length = len;\n");
|
||||
fprintf(output_file, " node->file_name = file_name ? strdup(file_name) : NULL;\n");
|
||||
fprintf(output_file, " node->line = token_line;\n");
|
||||
fprintf(output_file, " node->col = token_col;\n");
|
||||
fprintf(output_file, " node->prev = NULL;\n");
|
||||
fprintf(output_file, " node->next = NULL;\n");
|
||||
fprintf(output_file, " assign_tag_and_id(node, match_rule);\n\n");
|
||||
fprintf(output_file, " // Update line/col tracker\n");
|
||||
fprintf(output_file, " for (char* t = p; t < match_end; t++) {\n");
|
||||
fprintf(output_file, " if (*t == '\\n') {\n");
|
||||
fprintf(output_file, " current_line++;\n");
|
||||
fprintf(output_file, " current_col = 1;\n");
|
||||
fprintf(output_file, " } else {\n");
|
||||
fprintf(output_file, " current_col++;\n");
|
||||
fprintf(output_file, " }\n");
|
||||
fprintf(output_file, " }\n\n");
|
||||
fprintf(output_file, " %s* output_node = NULL;\n", data_type);
|
||||
fprintf(output_file, " %spost_process_result pr = %spost_process(node, &output_node);\n", prefix, prefix);
|
||||
fprintf(output_file, " if (pr == %sskip) {\n", prefix);
|
||||
fprintf(output_file, " free(node->head);\n");
|
||||
fprintf(output_file, " if (node->file_name) free(node->file_name);\n");
|
||||
fprintf(output_file, " free(node);\n");
|
||||
fprintf(output_file, " } else {\n");
|
||||
fprintf(output_file, " %s* to_append = (pr == %scontinue_with_output) ? output_node : node;\n", data_type, prefix);
|
||||
fprintf(output_file, " if (to_append) {\n");
|
||||
fprintf(output_file, " if (!*head) {\n");
|
||||
fprintf(output_file, " *head = to_append;\n");
|
||||
fprintf(output_file, " tail = to_append;\n");
|
||||
fprintf(output_file, " } else {\n");
|
||||
fprintf(output_file, " tail->next = to_append;\n");
|
||||
fprintf(output_file, " to_append->prev = tail;\n");
|
||||
fprintf(output_file, " tail = to_append;\n");
|
||||
fprintf(output_file, " }\n");
|
||||
fprintf(output_file, " }\n");
|
||||
fprintf(output_file, " }\n");
|
||||
fprintf(output_file, " p = match_end;\n");
|
||||
fprintf(output_file, " } else if (*p == ' ' || *p == '\\t' || *p == '\\r' || *p == '\\n') {\n");
|
||||
fprintf(output_file, " if (*p == '\\n') {\n");
|
||||
fprintf(output_file, " current_line++;\n");
|
||||
fprintf(output_file, " current_col = 1;\n");
|
||||
fprintf(output_file, " } else {\n");
|
||||
fprintf(output_file, " current_col++;\n");
|
||||
fprintf(output_file, " }\n");
|
||||
fprintf(output_file, " p++;\n");
|
||||
fprintf(output_file, " } else {\n");
|
||||
fprintf(output_file, " fprintf(stderr, \"Lexical error at %%s:%%lld:%%lld near '%%c'\\n\", file_name ? file_name : \"<input>\", (long long)token_line, (long long)token_col, *p);\n");
|
||||
fprintf(output_file, " %sfree(*head);\n", prefix);
|
||||
fprintf(output_file, " *head = NULL;\n");
|
||||
fprintf(output_file, " return 0;\n");
|
||||
fprintf(output_file, " }\n");
|
||||
fprintf(output_file, " }\n");
|
||||
fprintf(output_file, " return 1;\n");
|
||||
fprintf(output_file, "}\n\n");
|
||||
|
||||
// Write slex_file function
|
||||
fprintf(output_file, "char %sfile(FILE* f, char* file_name, %s** head) {\n", prefix, data_type);
|
||||
fprintf(output_file, " if (!f || !head) return 0;\n");
|
||||
fprintf(output_file, " fseek(f, 0, SEEK_END);\n");
|
||||
fprintf(output_file, " long size = ftell(f);\n");
|
||||
fprintf(output_file, " fseek(f, 0, SEEK_SET);\n\n");
|
||||
fprintf(output_file, " char* buf = (char*)malloc(size + 1);\n");
|
||||
fprintf(output_file, " if (!buf) return 0;\n");
|
||||
fprintf(output_file, " size_t read_bytes = fread(buf, 1, size, f);\n");
|
||||
fprintf(output_file, " buf[read_bytes] = '\\0';\n\n");
|
||||
fprintf(output_file, " char success = %scstr(buf, file_name, head);\n", prefix);
|
||||
fprintf(output_file, " free(buf);\n");
|
||||
fprintf(output_file, " return success;\n");
|
||||
fprintf(output_file, "}\n");
|
||||
|
||||
slex_free_dfa(dfa, dfa_state_count);
|
||||
return true;
|
||||
}
|
||||
Reference in New Issue
Block a user