Finished the implemenation with Antigravity.
This commit is contained in:
@@ -0,0 +1 @@
|
|||||||
|
bin/
|
||||||
@@ -0,0 +1,38 @@
|
|||||||
|
#ifndef SLEX_REGEX_H
|
||||||
|
#define SLEX_REGEX_H
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
// Represents an NFA state
|
||||||
|
typedef struct NFAState {
|
||||||
|
int id;
|
||||||
|
bool is_epsilon;
|
||||||
|
bool char_set[256];
|
||||||
|
struct NFAState* edge1;
|
||||||
|
struct NFAState* edge2;
|
||||||
|
int accept_rule_index; // -1 if not accepting, >= 0 for rule index (highest priority is lowest index)
|
||||||
|
} NFAState;
|
||||||
|
|
||||||
|
// Represents an NFA fragment (start and accept states)
|
||||||
|
typedef struct NFAFragment {
|
||||||
|
NFAState* start;
|
||||||
|
NFAState* accept;
|
||||||
|
} NFAFragment;
|
||||||
|
|
||||||
|
// Represents a DFA state
|
||||||
|
typedef struct DFAState {
|
||||||
|
int id;
|
||||||
|
int* nfa_states; // Sorted list of NFA state IDs that make up this DFA state
|
||||||
|
int nfa_state_count;
|
||||||
|
int transitions[256]; // DFA state transitions for each character (-1 if no transition)
|
||||||
|
int accept_rule_index; // -1 if not accepting, >= 0 if accepting (stores rule index)
|
||||||
|
} DFAState;
|
||||||
|
|
||||||
|
// Compiles a set of regular expression patterns into a minimized/complete DFA
|
||||||
|
DFAState* slex_compile_regexes(char** patterns, int pattern_count, int* dfa_state_count_out);
|
||||||
|
|
||||||
|
// Frees all DFA states allocated by slex_compile_regexes
|
||||||
|
void slex_free_dfa(DFAState* dfa_states, int dfa_state_count);
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -1,7 +1,320 @@
|
|||||||
#include "../../../Headers/slex_core.h"
|
#include "../../../Headers/slex_core.h"
|
||||||
|
#include "../../../Headers/slex_regex.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
// Helper to check if a tag is mapped to an ID, and return it.
|
||||||
|
static const char* get_mapped_id(slex_rules* rules, const char* tag) {
|
||||||
|
for (uint64_t i = 0; i < rules->mapping_count; i++) {
|
||||||
|
if (strcmp(rules->mappings[i].Tag, tag) == 0) {
|
||||||
|
return rules->mappings[i].Id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void generate_declarations(FILE* f, slex_options* options, slex_rules* rules) {
|
||||||
|
char* prefix = options->prefix ? options->prefix : "slex_";
|
||||||
|
char* data_type = options->data_type_name ? options->data_type_name : "slex_segment";
|
||||||
|
|
||||||
|
fprintf(f, "#include <stdint.h>\n");
|
||||||
|
fprintf(f, "#include <stdio.h>\n\n");
|
||||||
|
|
||||||
|
// slex_segment_tag enum
|
||||||
|
fprintf(f, "typedef enum %ssegment_tag {\n", prefix);
|
||||||
|
for (uint64_t i = 0; i < rules->rule_count; i++) {
|
||||||
|
fprintf(f, " %stag_%s,\n", prefix, rules->rules[i].Tag);
|
||||||
|
}
|
||||||
|
fprintf(f, "} %ssegment_tag;\n\n", prefix);
|
||||||
|
|
||||||
|
// slex_segment_id enum
|
||||||
|
fprintf(f, "typedef enum %ssegment_id {\n", prefix);
|
||||||
|
fprintf(f, " %sid_default = 0,\n", prefix);
|
||||||
|
// Find all unique mapping Ids
|
||||||
|
for (uint64_t i = 0; i < rules->mapping_count; i++) {
|
||||||
|
// Only print if not already printed (unique check)
|
||||||
|
bool unique = true;
|
||||||
|
for (uint64_t j = 0; j < i; j++) {
|
||||||
|
if (strcmp(rules->mappings[j].Id, rules->mappings[i].Id) == 0) {
|
||||||
|
unique = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (unique) {
|
||||||
|
fprintf(f, " %sid_%s,\n", prefix, rules->mappings[i].Id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(f, "} %ssegment_id;\n\n", prefix);
|
||||||
|
|
||||||
|
// slex_segment struct
|
||||||
|
fprintf(f, "typedef struct %s {\n", data_type);
|
||||||
|
fprintf(f, " char* head;\n");
|
||||||
|
fprintf(f, " int64_t length;\n");
|
||||||
|
fprintf(f, " char* file_name;\n");
|
||||||
|
fprintf(f, " int64_t line;\n");
|
||||||
|
fprintf(f, " int64_t col;\n");
|
||||||
|
fprintf(f, " %ssegment_tag tag;\n", prefix);
|
||||||
|
fprintf(f, " %ssegment_id id;\n", prefix);
|
||||||
|
fprintf(f, " struct %s* prev;\n", data_type);
|
||||||
|
fprintf(f, " struct %s* next;\n", data_type);
|
||||||
|
fprintf(f, "} %s;\n\n", data_type);
|
||||||
|
|
||||||
|
// post_process_result enum
|
||||||
|
fprintf(f, "typedef enum %spost_process_result {\n", prefix);
|
||||||
|
fprintf(f, " %scontinue,\n", prefix);
|
||||||
|
fprintf(f, " %sskip,\n", prefix);
|
||||||
|
fprintf(f, " %scontinue_with_output,\n", prefix);
|
||||||
|
fprintf(f, "} %spost_process_result;\n\n", prefix);
|
||||||
|
|
||||||
|
// Function declarations
|
||||||
|
fprintf(f, "%spost_process_result %spost_process(%s* input, %s** output);\n", prefix, prefix, data_type, data_type);
|
||||||
|
fprintf(f, "char %sfile(FILE* f, char* file_name, %s** head);\n", prefix, data_type);
|
||||||
|
fprintf(f, "char %scstr(char* input, char* file_name, %s** head);\n", prefix, data_type);
|
||||||
|
fprintf(f, "char %sfree(%s* head);\n\n", prefix, data_type);
|
||||||
|
}
|
||||||
|
|
||||||
bool slex_translate_to_file_c(slex_options *options, slex_rules *rules, FILE *output_file)
|
bool slex_translate_to_file_c(slex_options *options, slex_rules *rules, FILE *output_file)
|
||||||
{
|
{
|
||||||
// TODO: Stub for moment.
|
if (!options || !rules || !output_file) return false;
|
||||||
return false;
|
|
||||||
|
char* prefix = options->prefix ? options->prefix : "slex_";
|
||||||
|
char* data_type = options->data_type_name ? options->data_type_name : "slex_segment";
|
||||||
|
|
||||||
|
// 1. Compile regexes to DFA
|
||||||
|
char** patterns = (char**)malloc(rules->rule_count * sizeof(char*));
|
||||||
|
for (uint64_t i = 0; i < rules->rule_count; i++) {
|
||||||
|
patterns[i] = rules->rules[i].Pattern;
|
||||||
|
}
|
||||||
|
int dfa_state_count = 0;
|
||||||
|
DFAState* dfa = slex_compile_regexes(patterns, (int)rules->rule_count, &dfa_state_count);
|
||||||
|
free(patterns);
|
||||||
|
|
||||||
|
if (!dfa) return false;
|
||||||
|
|
||||||
|
// 2. Open header file if requested
|
||||||
|
FILE* header_f = NULL;
|
||||||
|
char* header_base = NULL;
|
||||||
|
if (options->header_output && strlen(options->header_output) > 0) {
|
||||||
|
header_f = fopen(options->header_output, "w");
|
||||||
|
if (!header_f) {
|
||||||
|
slex_free_dfa(dfa, dfa_state_count);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find basename of header_output to include it in the implementation
|
||||||
|
char* last_slash = strrchr(options->header_output, '/');
|
||||||
|
char* last_backslash = strrchr(options->header_output, '\\');
|
||||||
|
char* base = options->header_output;
|
||||||
|
if (last_slash && last_slash > base) base = last_slash + 1;
|
||||||
|
if (last_backslash && last_backslash > base) base = last_backslash + 1;
|
||||||
|
header_base = strdup(base);
|
||||||
|
|
||||||
|
// Write header guard
|
||||||
|
fprintf(header_f, "#ifndef __SLEX_GENERATED_H__\n");
|
||||||
|
fprintf(header_f, "#define __SLEX_GENERATED_H__\n\n");
|
||||||
|
generate_declarations(header_f, options, rules);
|
||||||
|
fprintf(header_f, "#endif\n");
|
||||||
|
fclose(header_f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Write implementation to output_file
|
||||||
|
fprintf(output_file, "#include <stdio.h>\n");
|
||||||
|
fprintf(output_file, "#include <stdlib.h>\n");
|
||||||
|
fprintf(output_file, "#include <string.h>\n");
|
||||||
|
fprintf(output_file, "#include <stdbool.h>\n\n");
|
||||||
|
|
||||||
|
if (header_base) {
|
||||||
|
fprintf(output_file, "#include \"%s\"\n\n", header_base);
|
||||||
|
free(header_base);
|
||||||
|
} else {
|
||||||
|
// Output inline declarations
|
||||||
|
generate_declarations(output_file, options, rules);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract variables and post-processor code
|
||||||
|
char* variables = "";
|
||||||
|
char* post_processor = "";
|
||||||
|
for (uint64_t i = 0; i < rules->code_block_count; i++) {
|
||||||
|
if (rules->code_blocks[i].target_languge == c_language) {
|
||||||
|
if (rules->code_blocks[i].variables) {
|
||||||
|
variables = rules->code_blocks[i].variables;
|
||||||
|
}
|
||||||
|
if (rules->code_blocks[i].post_processor_code) {
|
||||||
|
post_processor = rules->code_blocks[i].post_processor_code;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write variables
|
||||||
|
if (strlen(variables) > 0) {
|
||||||
|
fprintf(output_file, "/* --- User Variables --- */\n%s\n/* ---------------------- */\n\n", variables);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write slex_post_process function
|
||||||
|
fprintf(output_file, "%spost_process_result %spost_process(%s* input, %s** output) {\n", prefix, prefix, data_type, data_type);
|
||||||
|
if (strlen(post_processor) > 0) {
|
||||||
|
fprintf(output_file, "%s\n", post_processor);
|
||||||
|
} else {
|
||||||
|
// Default implementation
|
||||||
|
fprintf(output_file, " *output = input;\n");
|
||||||
|
fprintf(output_file, " return %scontinue;\n", prefix);
|
||||||
|
}
|
||||||
|
fprintf(output_file, "}\n\n");
|
||||||
|
|
||||||
|
// Write transition table
|
||||||
|
fprintf(output_file, "static const int transitions[%d][256] = {\n", dfa_state_count);
|
||||||
|
for (int i = 0; i < dfa_state_count; i++) {
|
||||||
|
fprintf(output_file, " {");
|
||||||
|
for (int c = 0; c < 256; c++) {
|
||||||
|
fprintf(output_file, "%d", dfa[i].transitions[c]);
|
||||||
|
if (c < 255) fprintf(output_file, ", ");
|
||||||
|
}
|
||||||
|
fprintf(output_file, "}");
|
||||||
|
if (i < dfa_state_count - 1) fprintf(output_file, ",\n");
|
||||||
|
else fprintf(output_file, "\n");
|
||||||
|
}
|
||||||
|
fprintf(output_file, "};\n\n");
|
||||||
|
|
||||||
|
// Write accepting rules table
|
||||||
|
fprintf(output_file, "static const int accepting_rules[%d] = {\n ", dfa_state_count);
|
||||||
|
for (int i = 0; i < dfa_state_count; i++) {
|
||||||
|
fprintf(output_file, "%d", dfa[i].accept_rule_index);
|
||||||
|
if (i < dfa_state_count - 1) fprintf(output_file, ", ");
|
||||||
|
}
|
||||||
|
fprintf(output_file, "\n};\n\n");
|
||||||
|
|
||||||
|
// Write mapping mappings table/resolver
|
||||||
|
fprintf(output_file, "static void assign_tag_and_id(%s* node, int rule_idx) {\n", data_type);
|
||||||
|
fprintf(output_file, " switch(rule_idx) {\n");
|
||||||
|
for (uint64_t i = 0; i < rules->rule_count; i++) {
|
||||||
|
fprintf(output_file, " case %d:\n", (int)i);
|
||||||
|
fprintf(output_file, " node->tag = %stag_%s;\n", prefix, rules->rules[i].Tag);
|
||||||
|
const char* mapped_id = get_mapped_id(rules, rules->rules[i].Tag);
|
||||||
|
if (mapped_id) {
|
||||||
|
fprintf(output_file, " node->id = %sid_%s;\n", prefix, mapped_id);
|
||||||
|
} else {
|
||||||
|
fprintf(output_file, " node->id = %sid_default;\n", prefix);
|
||||||
|
}
|
||||||
|
fprintf(output_file, " break;\n");
|
||||||
|
}
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, "}\n\n");
|
||||||
|
|
||||||
|
// Write slex_free function
|
||||||
|
fprintf(output_file, "char %sfree(%s* head) {\n", prefix, data_type);
|
||||||
|
fprintf(output_file, " while (head) {\n");
|
||||||
|
fprintf(output_file, " %s* next = head->next;\n", data_type);
|
||||||
|
fprintf(output_file, " free(head->head);\n");
|
||||||
|
fprintf(output_file, " if (head->file_name) free(head->file_name);\n");
|
||||||
|
fprintf(output_file, " free(head);\n");
|
||||||
|
fprintf(output_file, " head = next;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " return 1;\n");
|
||||||
|
fprintf(output_file, "}\n\n");
|
||||||
|
|
||||||
|
// Write slex_cstr function (DFA matching loop)
|
||||||
|
fprintf(output_file, "char %scstr(char* input, char* file_name, %s** head) {\n", prefix, data_type);
|
||||||
|
fprintf(output_file, " if (!input || !head) return 0;\n");
|
||||||
|
fprintf(output_file, " *head = NULL;\n");
|
||||||
|
fprintf(output_file, " %s* tail = NULL;\n", data_type);
|
||||||
|
fprintf(output_file, " char* p = input;\n");
|
||||||
|
fprintf(output_file, " int64_t current_line = 1;\n");
|
||||||
|
fprintf(output_file, " int64_t current_col = 1;\n\n");
|
||||||
|
fprintf(output_file, " while (*p != '\\0') {\n");
|
||||||
|
fprintf(output_file, " int state = 0;\n");
|
||||||
|
fprintf(output_file, " char* match_end = NULL;\n");
|
||||||
|
fprintf(output_file, " int match_rule = -1;\n");
|
||||||
|
fprintf(output_file, " int64_t token_line = current_line;\n");
|
||||||
|
fprintf(output_file, " int64_t token_col = current_col;\n\n");
|
||||||
|
fprintf(output_file, " char* curr_p = p;\n");
|
||||||
|
fprintf(output_file, " while (*curr_p != '\\0') {\n");
|
||||||
|
fprintf(output_file, " unsigned char c = (unsigned char)*curr_p;\n");
|
||||||
|
fprintf(output_file, " int next_state = transitions[state][c];\n");
|
||||||
|
fprintf(output_file, " if (next_state == -1) break;\n");
|
||||||
|
fprintf(output_file, " state = next_state;\n");
|
||||||
|
fprintf(output_file, " if (accepting_rules[state] != -1) {\n");
|
||||||
|
fprintf(output_file, " match_end = curr_p + 1;\n");
|
||||||
|
fprintf(output_file, " match_rule = accepting_rules[state];\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " curr_p++;\n");
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
fprintf(output_file, " if (match_rule != -1 && match_end > p) {\n");
|
||||||
|
fprintf(output_file, " int64_t len = match_end - p;\n");
|
||||||
|
fprintf(output_file, " %s* node = (%s*)malloc(sizeof(%s));\n", data_type, data_type, data_type);
|
||||||
|
fprintf(output_file, " node->head = (char*)malloc(len + 1);\n");
|
||||||
|
fprintf(output_file, " memcpy(node->head, p, len);\n");
|
||||||
|
fprintf(output_file, " node->head[len] = '\\0';\n");
|
||||||
|
fprintf(output_file, " node->length = len;\n");
|
||||||
|
fprintf(output_file, " node->file_name = file_name ? strdup(file_name) : NULL;\n");
|
||||||
|
fprintf(output_file, " node->line = token_line;\n");
|
||||||
|
fprintf(output_file, " node->col = token_col;\n");
|
||||||
|
fprintf(output_file, " node->prev = NULL;\n");
|
||||||
|
fprintf(output_file, " node->next = NULL;\n");
|
||||||
|
fprintf(output_file, " assign_tag_and_id(node, match_rule);\n\n");
|
||||||
|
fprintf(output_file, " // Update line/col tracker\n");
|
||||||
|
fprintf(output_file, " for (char* t = p; t < match_end; t++) {\n");
|
||||||
|
fprintf(output_file, " if (*t == '\\n') {\n");
|
||||||
|
fprintf(output_file, " current_line++;\n");
|
||||||
|
fprintf(output_file, " current_col = 1;\n");
|
||||||
|
fprintf(output_file, " } else {\n");
|
||||||
|
fprintf(output_file, " current_col++;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
fprintf(output_file, " %s* output_node = NULL;\n", data_type);
|
||||||
|
fprintf(output_file, " %spost_process_result pr = %spost_process(node, &output_node);\n", prefix, prefix);
|
||||||
|
fprintf(output_file, " if (pr == %sskip) {\n", prefix);
|
||||||
|
fprintf(output_file, " free(node->head);\n");
|
||||||
|
fprintf(output_file, " if (node->file_name) free(node->file_name);\n");
|
||||||
|
fprintf(output_file, " free(node);\n");
|
||||||
|
fprintf(output_file, " } else {\n");
|
||||||
|
fprintf(output_file, " %s* to_append = (pr == %scontinue_with_output) ? output_node : node;\n", data_type, prefix);
|
||||||
|
fprintf(output_file, " if (to_append) {\n");
|
||||||
|
fprintf(output_file, " if (!*head) {\n");
|
||||||
|
fprintf(output_file, " *head = to_append;\n");
|
||||||
|
fprintf(output_file, " tail = to_append;\n");
|
||||||
|
fprintf(output_file, " } else {\n");
|
||||||
|
fprintf(output_file, " tail->next = to_append;\n");
|
||||||
|
fprintf(output_file, " to_append->prev = tail;\n");
|
||||||
|
fprintf(output_file, " tail = to_append;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " p = match_end;\n");
|
||||||
|
fprintf(output_file, " } else if (*p == ' ' || *p == '\\t' || *p == '\\r' || *p == '\\n') {\n");
|
||||||
|
fprintf(output_file, " if (*p == '\\n') {\n");
|
||||||
|
fprintf(output_file, " current_line++;\n");
|
||||||
|
fprintf(output_file, " current_col = 1;\n");
|
||||||
|
fprintf(output_file, " } else {\n");
|
||||||
|
fprintf(output_file, " current_col++;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " p++;\n");
|
||||||
|
fprintf(output_file, " } else {\n");
|
||||||
|
fprintf(output_file, " fprintf(stderr, \"Lexical error at %%s:%%lld:%%lld near '%%c'\\n\", file_name ? file_name : \"<input>\", (long long)token_line, (long long)token_col, *p);\n");
|
||||||
|
fprintf(output_file, " %sfree(*head);\n", prefix);
|
||||||
|
fprintf(output_file, " *head = NULL;\n");
|
||||||
|
fprintf(output_file, " return 0;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " return 1;\n");
|
||||||
|
fprintf(output_file, "}\n\n");
|
||||||
|
|
||||||
|
// Write slex_file function
|
||||||
|
fprintf(output_file, "char %sfile(FILE* f, char* file_name, %s** head) {\n", prefix, data_type);
|
||||||
|
fprintf(output_file, " if (!f || !head) return 0;\n");
|
||||||
|
fprintf(output_file, " fseek(f, 0, SEEK_END);\n");
|
||||||
|
fprintf(output_file, " long size = ftell(f);\n");
|
||||||
|
fprintf(output_file, " fseek(f, 0, SEEK_SET);\n\n");
|
||||||
|
fprintf(output_file, " char* buf = (char*)malloc(size + 1);\n");
|
||||||
|
fprintf(output_file, " if (!buf) return 0;\n");
|
||||||
|
fprintf(output_file, " size_t read_bytes = fread(buf, 1, size, f);\n");
|
||||||
|
fprintf(output_file, " buf[read_bytes] = '\\0';\n\n");
|
||||||
|
fprintf(output_file, " char success = %scstr(buf, file_name, head);\n", prefix);
|
||||||
|
fprintf(output_file, " free(buf);\n");
|
||||||
|
fprintf(output_file, " return success;\n");
|
||||||
|
fprintf(output_file, "}\n");
|
||||||
|
|
||||||
|
slex_free_dfa(dfa, dfa_state_count);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,259 @@
|
|||||||
#include "../../../Headers/slex_core.h"
|
#include "../../../Headers/slex_core.h"
|
||||||
|
#include "../../../Headers/slex_regex.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
// Helper to check if a tag is mapped to an ID, and return it.
|
||||||
|
static const char* get_mapped_id(slex_rules* rules, const char* tag) {
|
||||||
|
for (uint64_t i = 0; i < rules->mapping_count; i++) {
|
||||||
|
if (strcmp(rules->mappings[i].Tag, tag) == 0) {
|
||||||
|
return rules->mappings[i].Id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print a string as a safe C# verbatim string literal
|
||||||
|
static void print_verbatim_string(FILE* f, const char* str) {
|
||||||
|
fprintf(f, "@\"");
|
||||||
|
for (int i = 0; str[i] != '\0'; i++) {
|
||||||
|
if (str[i] == '"') {
|
||||||
|
fprintf(f, "\"\"");
|
||||||
|
} else {
|
||||||
|
fputc(str[i], f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(f, "\"");
|
||||||
|
}
|
||||||
|
|
||||||
bool slex_translate_to_file_csharp(slex_options *options, slex_rules *rules, FILE *output_file)
|
bool slex_translate_to_file_csharp(slex_options *options, slex_rules *rules, FILE *output_file)
|
||||||
{
|
{
|
||||||
// TODO: Stub for moment.
|
if (!options || !rules || !output_file) return false;
|
||||||
return false;
|
|
||||||
|
char* ns_name = (options->namespace_name && strlen(options->namespace_name) > 0) ? options->namespace_name : "SLexGenerated";
|
||||||
|
char* class_name = (options->class_name && strlen(options->class_name) > 0) ? options->class_name : "SLexer";
|
||||||
|
char* data_type = (options->data_type_name && strlen(options->data_type_name) > 0) ? options->data_type_name : "Segment";
|
||||||
|
char* prefix = options->prefix ? options->prefix : "";
|
||||||
|
|
||||||
|
// Extract C# code block variables & post_processor
|
||||||
|
char* variables = "";
|
||||||
|
char* post_processor = "";
|
||||||
|
for (uint64_t i = 0; i < rules->code_block_count; i++) {
|
||||||
|
if (rules->code_blocks[i].target_languge == csharp) {
|
||||||
|
if (rules->code_blocks[i].variables) {
|
||||||
|
variables = rules->code_blocks[i].variables;
|
||||||
|
}
|
||||||
|
if (rules->code_blocks[i].post_processor_code) {
|
||||||
|
post_processor = rules->code_blocks[i].post_processor_code;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate file content
|
||||||
|
fprintf(output_file, "using System;\n");
|
||||||
|
fprintf(output_file, "using System.IO;\n");
|
||||||
|
fprintf(output_file, "using System.Text;\n");
|
||||||
|
fprintf(output_file, "using System.Text.RegularExpressions;\n\n");
|
||||||
|
|
||||||
|
fprintf(output_file, "namespace %s\n{\n", ns_name);
|
||||||
|
|
||||||
|
// SegmentTag enum
|
||||||
|
fprintf(output_file, " public enum %sTag\n {\n", data_type);
|
||||||
|
for (uint64_t i = 0; i < rules->rule_count; i++) {
|
||||||
|
fprintf(output_file, " %s,\n", rules->rules[i].Tag);
|
||||||
|
}
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
|
||||||
|
// SegmentId enum
|
||||||
|
fprintf(output_file, " public enum %sId\n {\n", data_type);
|
||||||
|
fprintf(output_file, " Default = 0,\n");
|
||||||
|
for (uint64_t i = 0; i < rules->mapping_count; i++) {
|
||||||
|
bool unique = true;
|
||||||
|
for (uint64_t j = 0; j < i; j++) {
|
||||||
|
if (strcmp(rules->mappings[j].Id, rules->mappings[i].Id) == 0) {
|
||||||
|
unique = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (unique) {
|
||||||
|
fprintf(output_file, " %s,\n", rules->mappings[i].Id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
|
||||||
|
// PostProcessResult enum
|
||||||
|
fprintf(output_file, " public enum PostProcessResult\n {\n");
|
||||||
|
fprintf(output_file, " Continue,\n");
|
||||||
|
fprintf(output_file, " Skip,\n");
|
||||||
|
fprintf(output_file, " ContinueWithOutput\n");
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
|
||||||
|
// Segment class
|
||||||
|
fprintf(output_file, " public class %s\n {\n", data_type);
|
||||||
|
fprintf(output_file, " public string Content { get; set; } = string.Empty;\n");
|
||||||
|
fprintf(output_file, " public string FileName { get; set; } = string.Empty;\n");
|
||||||
|
fprintf(output_file, " public %s? Prev { get; set; }\n", data_type);
|
||||||
|
fprintf(output_file, " public %s? Next { get; set; }\n", data_type);
|
||||||
|
fprintf(output_file, " public long Line { get; set; }\n");
|
||||||
|
fprintf(output_file, " public long Column { get; set; }\n");
|
||||||
|
fprintf(output_file, " public %sTag Tag { get; set; }\n", data_type);
|
||||||
|
fprintf(output_file, " public %sId Id { get; set; }\n", data_type);
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
|
||||||
|
// SLexer class
|
||||||
|
fprintf(output_file, " public class %s\n {\n", class_name);
|
||||||
|
|
||||||
|
// User variables
|
||||||
|
if (strlen(variables) > 0) {
|
||||||
|
fprintf(output_file, " // --- User Variables ---\n%s\n // ----------------------\n\n", variables);
|
||||||
|
}
|
||||||
|
|
||||||
|
// slex_post_process method
|
||||||
|
fprintf(output_file, " private PostProcessResult slex_post_process(%s Input, out %s Output)\n {\n", data_type, data_type);
|
||||||
|
if (strlen(post_processor) > 0) {
|
||||||
|
fprintf(output_file, "%s\n", post_processor);
|
||||||
|
} else {
|
||||||
|
fprintf(output_file, " Output = Input;\n");
|
||||||
|
fprintf(output_file, " return PostProcessResult.Continue;\n");
|
||||||
|
}
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
|
||||||
|
// Compiled System.Text.RegularExpressions.Regex rules using the \G anchor
|
||||||
|
fprintf(output_file, " private static readonly Regex[] Rules = new Regex[] {\n");
|
||||||
|
for (uint64_t i = 0; i < rules->rule_count; i++) {
|
||||||
|
fprintf(output_file, " new Regex(@\"\\G\" + ");
|
||||||
|
print_verbatim_string(output_file, rules->rules[i].Pattern);
|
||||||
|
fprintf(output_file, ", RegexOptions.Compiled)");
|
||||||
|
if (i < rules->rule_count - 1) fprintf(output_file, ",\n");
|
||||||
|
else fprintf(output_file, "\n");
|
||||||
|
}
|
||||||
|
fprintf(output_file, " };\n\n");
|
||||||
|
|
||||||
|
// Tag and ID assignment
|
||||||
|
fprintf(output_file, " private void AssignTagAndId(%s node, int ruleIdx)\n {\n", data_type);
|
||||||
|
fprintf(output_file, " switch (ruleIdx)\n {\n");
|
||||||
|
for (uint64_t i = 0; i < rules->rule_count; i++) {
|
||||||
|
fprintf(output_file, " case %d:\n", (int)i);
|
||||||
|
fprintf(output_file, " node.Tag = %sTag.%s;\n", data_type, rules->rules[i].Tag);
|
||||||
|
const char* mapped_id = get_mapped_id(rules, rules->rules[i].Tag);
|
||||||
|
if (mapped_id) {
|
||||||
|
fprintf(output_file, " node.Id = %sId.%s;\n", data_type, mapped_id);
|
||||||
|
} else {
|
||||||
|
fprintf(output_file, " node.Id = %sId.Default;\n", data_type);
|
||||||
|
}
|
||||||
|
fprintf(output_file, " break;\n");
|
||||||
|
}
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
|
||||||
|
// SLex overloads
|
||||||
|
// 1. FileInfo
|
||||||
|
fprintf(output_file, " public bool %sSLex(FileInfo inputFile, out %s? Head)\n {\n", prefix, data_type);
|
||||||
|
fprintf(output_file, " if (inputFile == null)\n {\n");
|
||||||
|
fprintf(output_file, " Head = null;\n");
|
||||||
|
fprintf(output_file, " return false;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " using (var stream = inputFile.OpenRead())\n {\n");
|
||||||
|
fprintf(output_file, " return %sSLex(stream, inputFile.FullName, out Head);\n", prefix);
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
|
||||||
|
// 2. Stream
|
||||||
|
fprintf(output_file, " public bool %sSLex(Stream inputStream, out %s? Head)\n {\n", prefix, data_type);
|
||||||
|
fprintf(output_file, " return %sSLex(inputStream, string.Empty, out Head);\n", prefix);
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
|
||||||
|
// helper Stream with filename
|
||||||
|
fprintf(output_file, " public bool %sSLex(Stream inputStream, string fileName, out %s? Head)\n {\n", prefix, data_type);
|
||||||
|
fprintf(output_file, " if (inputStream == null)\n {\n");
|
||||||
|
fprintf(output_file, " Head = null;\n");
|
||||||
|
fprintf(output_file, " return false;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " using (var reader = new StreamReader(inputStream))\n {\n");
|
||||||
|
fprintf(output_file, " return %sSLex(reader.ReadToEnd(), fileName, out Head);\n", prefix);
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
|
||||||
|
// 3. string
|
||||||
|
fprintf(output_file, " public bool %sSLex(string inputContent, out %s? Head)\n {\n", prefix, data_type);
|
||||||
|
fprintf(output_file, " return %sSLex(inputContent, string.Empty, out Head);\n", prefix);
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
|
||||||
|
// Core matching loop: string with filename
|
||||||
|
fprintf(output_file, " public bool %sSLex(string inputContent, string fileName, out %s? Head)\n {\n", prefix, data_type);
|
||||||
|
fprintf(output_file, " Head = null;\n");
|
||||||
|
fprintf(output_file, " if (inputContent == null) return false;\n\n");
|
||||||
|
fprintf(output_file, " %s? head = null;\n", data_type);
|
||||||
|
fprintf(output_file, " %s? tail = null;\n", data_type);
|
||||||
|
fprintf(output_file, " int idx = 0;\n");
|
||||||
|
fprintf(output_file, " int len = inputContent.Length;\n");
|
||||||
|
fprintf(output_file, " long currentLine = 1;\n");
|
||||||
|
fprintf(output_file, " long currentCol = 1;\n\n");
|
||||||
|
fprintf(output_file, " while (idx < len)\n {\n");
|
||||||
|
fprintf(output_file, " int bestRule = -1;\n");
|
||||||
|
fprintf(output_file, " int bestLen = 0;\n");
|
||||||
|
fprintf(output_file, " string bestVal = \"\";\n");
|
||||||
|
fprintf(output_file, " long tokenLine = currentLine;\n");
|
||||||
|
fprintf(output_file, " long tokenCol = currentCol;\n\n");
|
||||||
|
fprintf(output_file, " for (int r = 0; r < Rules.Length; r++)\n");
|
||||||
|
fprintf(output_file, " {\n");
|
||||||
|
fprintf(output_file, " var m = Rules[r].Match(inputContent, idx);\n");
|
||||||
|
fprintf(output_file, " if (m.Success && m.Length > bestLen)\n");
|
||||||
|
fprintf(output_file, " {\n");
|
||||||
|
fprintf(output_file, " bestLen = m.Length;\n");
|
||||||
|
fprintf(output_file, " bestRule = r;\n");
|
||||||
|
fprintf(output_file, " bestVal = m.Value;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
fprintf(output_file, " if (bestRule != -1 && bestLen > 0)\n {\n");
|
||||||
|
fprintf(output_file, " var node = new %s\n {\n", data_type);
|
||||||
|
fprintf(output_file, " Content = bestVal,\n");
|
||||||
|
fprintf(output_file, " FileName = fileName,\n");
|
||||||
|
fprintf(output_file, " Line = tokenLine,\n");
|
||||||
|
fprintf(output_file, " Column = tokenCol\n");
|
||||||
|
fprintf(output_file, " };\n");
|
||||||
|
fprintf(output_file, " AssignTagAndId(node, bestRule);\n\n");
|
||||||
|
fprintf(output_file, " // Update line/col tracker\n");
|
||||||
|
fprintf(output_file, " for (int t = idx; t < idx + bestLen; t++)\n {\n");
|
||||||
|
fprintf(output_file, " if (inputContent[t] == '\\n')\n {\n");
|
||||||
|
fprintf(output_file, " currentLine++;\n");
|
||||||
|
fprintf(output_file, " currentCol = 1;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " else\n {\n");
|
||||||
|
fprintf(output_file, " currentCol++;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
fprintf(output_file, " %s? outputNode;\n", data_type);
|
||||||
|
fprintf(output_file, " PostProcessResult pr = slex_post_process(node, out outputNode);\n");
|
||||||
|
fprintf(output_file, " if (pr != PostProcessResult.Skip)\n {\n");
|
||||||
|
fprintf(output_file, " %s? toAppend = (pr == PostProcessResult.ContinueWithOutput) ? outputNode : node;\n", data_type);
|
||||||
|
fprintf(output_file, " if (toAppend != null)\n {\n");
|
||||||
|
fprintf(output_file, " if (head == null)\n {\n");
|
||||||
|
fprintf(output_file, " head = toAppend;\n");
|
||||||
|
fprintf(output_file, " tail = toAppend;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " else\n {\n");
|
||||||
|
fprintf(output_file, " tail.Next = toAppend;\n");
|
||||||
|
fprintf(output_file, " toAppend.Prev = tail;\n");
|
||||||
|
fprintf(output_file, " tail = toAppend;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " idx += bestLen;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " else\n {\n");
|
||||||
|
fprintf(output_file, " Console.Error.WriteLine($\"Lexical error at {fileName}:{tokenLine}:{tokenCol} near '{inputContent[idx]}'\");\n");
|
||||||
|
fprintf(output_file, " Head = null;\n");
|
||||||
|
fprintf(output_file, " return false;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " }\n\n");
|
||||||
|
fprintf(output_file, " Head = head;\n");
|
||||||
|
fprintf(output_file, " return true;\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, " }\n");
|
||||||
|
fprintf(output_file, "}\n");
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
@@ -1,6 +1,190 @@
|
|||||||
#include "../../Headers/slex_core.h"
|
#include "../../Headers/slex_core.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
|
static void print_usage(const char* prog_name) {
|
||||||
|
printf("Usage: %s [options] <input_file> [options]\n\n", prog_name);
|
||||||
|
printf("Options:\n");
|
||||||
|
printf(" -o <output> Output file/output folder\n");
|
||||||
|
printf(" -l <language> Specify target language: c, c#, csharp (default: detected or c)\n");
|
||||||
|
printf(" -h <header> Output header file (separates declarations and implementation for C)\n");
|
||||||
|
printf(" -ns <namespace> Specify namespace (C# only. Default: SLexGenerated)\n");
|
||||||
|
printf(" -class <class_name> Specify class name (C# only. Default: SLexer)\n");
|
||||||
|
printf(" -prefix <prefix> Specify prefix for functions/methods\n");
|
||||||
|
printf(" -data_type <data_type> Specify data type name of the segment\n");
|
||||||
|
}
|
||||||
|
|
||||||
int main(int ac, char **av)
|
int main(int ac, char **av)
|
||||||
{
|
{
|
||||||
|
char* input_path = NULL;
|
||||||
|
char* output_path = NULL;
|
||||||
|
char* header_path = NULL;
|
||||||
|
char* lang_str = NULL;
|
||||||
|
char* ns_name = NULL;
|
||||||
|
char* class_name = NULL;
|
||||||
|
char* prefix = NULL;
|
||||||
|
char* data_type_name = NULL;
|
||||||
|
|
||||||
|
for (int i = 1; i < ac; i++) {
|
||||||
|
if (av[i][0] == '-') {
|
||||||
|
if (strcmp(av[i], "-o") == 0) {
|
||||||
|
if (i + 1 < ac) output_path = av[++i];
|
||||||
|
else { fprintf(stderr, "Error: -o option requires an argument\n"); return 1; }
|
||||||
|
} else if (strcmp(av[i], "-l") == 0) {
|
||||||
|
if (i + 1 < ac) lang_str = av[++i];
|
||||||
|
else { fprintf(stderr, "Error: -l option requires an argument\n"); return 1; }
|
||||||
|
} else if (strcmp(av[i], "-h") == 0) {
|
||||||
|
if (i + 1 < ac) header_path = av[++i];
|
||||||
|
else { fprintf(stderr, "Error: -h option requires an argument\n"); return 1; }
|
||||||
|
} else if (strcmp(av[i], "-ns") == 0) {
|
||||||
|
if (i + 1 < ac) ns_name = av[++i];
|
||||||
|
else { fprintf(stderr, "Error: -ns option requires an argument\n"); return 1; }
|
||||||
|
} else if (strcmp(av[i], "-class") == 0) {
|
||||||
|
if (i + 1 < ac) class_name = av[++i];
|
||||||
|
else { fprintf(stderr, "Error: -class option requires an argument\n"); return 1; }
|
||||||
|
} else if (strcmp(av[i], "-prefix") == 0) {
|
||||||
|
if (i + 1 < ac) prefix = av[++i];
|
||||||
|
else { fprintf(stderr, "Error: -prefix option requires an argument\n"); return 1; }
|
||||||
|
} else if (strcmp(av[i], "-data_type") == 0) {
|
||||||
|
if (i + 1 < ac) data_type_name = av[++i];
|
||||||
|
else { fprintf(stderr, "Error: -data_type option requires an argument\n"); return 1; }
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Error: Unknown option %s\n", av[i]);
|
||||||
|
print_usage(av[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!input_path) {
|
||||||
|
input_path = av[i];
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Error: Multiple input files are not supported: %s\n", av[i]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!input_path) {
|
||||||
|
fprintf(stderr, "Error: No input file specified\n");
|
||||||
|
print_usage(av[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine target language
|
||||||
|
slex_target_language target_lang = c_language;
|
||||||
|
bool lang_detected = false;
|
||||||
|
|
||||||
|
if (lang_str) {
|
||||||
|
char temp_lang[256];
|
||||||
|
int l_len = (int)strlen(lang_str);
|
||||||
|
for (int k = 0; k < l_len && k < 255; k++) {
|
||||||
|
temp_lang[k] = (char)tolower((unsigned char)lang_str[k]);
|
||||||
|
}
|
||||||
|
temp_lang[l_len] = '\0';
|
||||||
|
|
||||||
|
if (strcmp(temp_lang, "c") == 0) {
|
||||||
|
target_lang = c_language;
|
||||||
|
lang_detected = true;
|
||||||
|
} else if (strcmp(temp_lang, "c#") == 0 || strcmp(temp_lang, "csharp") == 0) {
|
||||||
|
target_lang = csharp;
|
||||||
|
lang_detected = true;
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Error: Unsupported language %s\n", lang_str);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else if (output_path) {
|
||||||
|
// Guess language from output path extension
|
||||||
|
char* ext = strrchr(output_path, '.');
|
||||||
|
if (ext) {
|
||||||
|
if (strcmp(ext, ".cs") == 0) {
|
||||||
|
target_lang = csharp;
|
||||||
|
lang_detected = true;
|
||||||
|
} else if (strcmp(ext, ".c") == 0) {
|
||||||
|
target_lang = c_language;
|
||||||
|
lang_detected = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!lang_detected) {
|
||||||
|
// Default to C if not specified and not detected
|
||||||
|
target_lang = c_language;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize options with defaults
|
||||||
|
slex_options options;
|
||||||
|
memset(&options, 0, sizeof(options));
|
||||||
|
options.target_language = target_lang;
|
||||||
|
options.header_output = header_path;
|
||||||
|
|
||||||
|
if (target_lang == c_language) {
|
||||||
|
options.prefix = prefix ? prefix : "slex_";
|
||||||
|
options.data_type_name = data_type_name ? data_type_name : "slex_segment";
|
||||||
|
} else {
|
||||||
|
options.namespace_name = ns_name ? ns_name : "SLexGenerated";
|
||||||
|
options.class_name = class_name ? class_name : "SLexer";
|
||||||
|
options.prefix = prefix ? prefix : "";
|
||||||
|
options.data_type_name = data_type_name ? data_type_name : "Segment";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load input rule file
|
||||||
|
FILE* in_f = fopen(input_path, "r");
|
||||||
|
if (!in_f) {
|
||||||
|
fprintf(stderr, "Error: Failed to open input file %s\n", input_path);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
slex_rules rules;
|
||||||
|
memset(&rules, 0, sizeof(rules));
|
||||||
|
if (!slex_read_rule_from_file(in_f, &rules)) {
|
||||||
|
fprintf(stderr, "Error: Failed to parse rule file %s\n", input_path);
|
||||||
|
fclose(in_f);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
fclose(in_f);
|
||||||
|
|
||||||
|
// Open output file
|
||||||
|
FILE* out_f = stdout;
|
||||||
|
if (output_path && strlen(output_path) > 0) {
|
||||||
|
out_f = fopen(output_path, "w");
|
||||||
|
if (!out_f) {
|
||||||
|
fprintf(stderr, "Error: Failed to open output file %s\n", output_path);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform translation
|
||||||
|
bool success = slex_translate_to_file(&options, &rules, out_f);
|
||||||
|
|
||||||
|
if (out_f != stdout) {
|
||||||
|
fclose(out_f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Free parsed rule structures
|
||||||
|
for (uint64_t i = 0; i < rules.rule_count; i++) {
|
||||||
|
free(rules.rules[i].Tag);
|
||||||
|
free(rules.rules[i].Pattern);
|
||||||
|
}
|
||||||
|
free(rules.rules);
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < rules.mapping_count; i++) {
|
||||||
|
free(rules.mappings[i].Id);
|
||||||
|
free(rules.mappings[i].Tag);
|
||||||
|
}
|
||||||
|
free(rules.mappings);
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < rules.code_block_count; i++) {
|
||||||
|
if (rules.code_blocks[i].post_processor_code) free(rules.code_blocks[i].post_processor_code);
|
||||||
|
if (rules.code_blocks[i].variables) free(rules.code_blocks[i].variables);
|
||||||
|
}
|
||||||
|
free(rules.code_blocks);
|
||||||
|
|
||||||
|
if (!success) {
|
||||||
|
fprintf(stderr, "Error: Code generation failed\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Success: Generated lexer source code\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
+14
-1
@@ -1 +1,14 @@
|
|||||||
#include "../Headers/slex_core.h"
|
#include "../Headers/slex_core.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
bool slex_translate_to_file(slex_options *options, slex_rules *rules, FILE *output_file)
|
||||||
|
{
|
||||||
|
if (!options || !rules || !output_file) return false;
|
||||||
|
|
||||||
|
if (options->target_language == c_language) {
|
||||||
|
return slex_translate_to_file_c(options, rules, output_file);
|
||||||
|
} else if (options->target_language == csharp) {
|
||||||
|
return slex_translate_to_file_csharp(options, rules, output_file);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
@@ -0,0 +1,249 @@
|
|||||||
|
#include "../Headers/slex_core.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
|
// Helper: Trim leading and trailing whitespace
|
||||||
|
static char* trim_whitespace(char* str) {
|
||||||
|
while (isspace((unsigned char)*str)) {
|
||||||
|
str++;
|
||||||
|
}
|
||||||
|
if (*str == '\0') {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
char* end = str + strlen(str) - 1;
|
||||||
|
while (end > str && isspace((unsigned char)*end)) {
|
||||||
|
end--;
|
||||||
|
}
|
||||||
|
*(end + 1) = '\0';
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper: Get next line from buffer
|
||||||
|
static bool get_next_line(char** cursor, char* line_buf, int max_len) {
|
||||||
|
char* c = *cursor;
|
||||||
|
if (*c == '\0') return false;
|
||||||
|
int idx = 0;
|
||||||
|
while (*c != '\0' && *c != '\n' && *c != '\r' && idx < max_len - 1) {
|
||||||
|
line_buf[idx++] = *c++;
|
||||||
|
}
|
||||||
|
line_buf[idx] = '\0';
|
||||||
|
|
||||||
|
// Skip newline characters
|
||||||
|
if (*c == '\r') c++;
|
||||||
|
if (*c == '\n') c++;
|
||||||
|
|
||||||
|
*cursor = c;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool slex_read_rule_from_cstr(char *content, slex_rules *output_rule) {
|
||||||
|
if (!content || !output_rule) return false;
|
||||||
|
|
||||||
|
// Initialize output structure
|
||||||
|
output_rule->rules = NULL;
|
||||||
|
output_rule->rule_count = 0;
|
||||||
|
output_rule->mappings = NULL;
|
||||||
|
output_rule->mapping_count = 0;
|
||||||
|
output_rule->code_blocks = NULL;
|
||||||
|
output_rule->code_block_count = 0;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
STATE_NONE,
|
||||||
|
STATE_RULE,
|
||||||
|
STATE_MAPPING,
|
||||||
|
STATE_CODE
|
||||||
|
} ParserState;
|
||||||
|
|
||||||
|
ParserState state = STATE_NONE;
|
||||||
|
slex_target_language current_lang = c_language;
|
||||||
|
bool has_lang = false;
|
||||||
|
|
||||||
|
char* cursor = content;
|
||||||
|
char line[4096];
|
||||||
|
|
||||||
|
while (get_next_line(&cursor, line, sizeof(line))) {
|
||||||
|
char* trimmed = trim_whitespace(line);
|
||||||
|
|
||||||
|
// Skip comments and empty lines
|
||||||
|
if (trimmed[0] == '\0' || trimmed[0] == '#' || (trimmed[0] == '/' && trimmed[1] == '/')) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Section switches
|
||||||
|
if (strcmp(trimmed, "rule:") == 0) {
|
||||||
|
state = STATE_RULE;
|
||||||
|
continue;
|
||||||
|
} else if (strcmp(trimmed, "mapping:") == 0) {
|
||||||
|
state = STATE_MAPPING;
|
||||||
|
continue;
|
||||||
|
} else if (strcmp(trimmed, "code:") == 0) {
|
||||||
|
state = STATE_CODE;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state == STATE_RULE) {
|
||||||
|
// Split into <Tag> and <MatchingPattern>
|
||||||
|
// Tag is first space-delimited token
|
||||||
|
char* tag = trimmed;
|
||||||
|
char* pattern = trimmed;
|
||||||
|
while (*pattern != '\0' && !isspace((unsigned char)*pattern)) {
|
||||||
|
pattern++;
|
||||||
|
}
|
||||||
|
if (*pattern != '\0') {
|
||||||
|
*pattern = '\0';
|
||||||
|
pattern++;
|
||||||
|
pattern = trim_whitespace(pattern);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strlen(tag) > 0 && strlen(pattern) > 0) {
|
||||||
|
output_rule->rule_count++;
|
||||||
|
output_rule->rules = (slex_rule*)realloc(output_rule->rules, output_rule->rule_count * sizeof(slex_rule));
|
||||||
|
output_rule->rules[output_rule->rule_count - 1].Tag = strdup(tag);
|
||||||
|
output_rule->rules[output_rule->rule_count - 1].Pattern = strdup(pattern);
|
||||||
|
}
|
||||||
|
} else if (state == STATE_MAPPING) {
|
||||||
|
// Split into <Id> and <Tag>
|
||||||
|
char* id = trimmed;
|
||||||
|
char* tag = trimmed;
|
||||||
|
while (*tag != '\0' && !isspace((unsigned char)*tag)) {
|
||||||
|
tag++;
|
||||||
|
}
|
||||||
|
if (*tag != '\0') {
|
||||||
|
*tag = '\0';
|
||||||
|
tag++;
|
||||||
|
tag = trim_whitespace(tag);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strlen(id) > 0 && strlen(tag) > 0) {
|
||||||
|
output_rule->mapping_count++;
|
||||||
|
output_rule->mappings = (slex_mapping*)realloc(output_rule->mappings, output_rule->mapping_count * sizeof(slex_mapping));
|
||||||
|
output_rule->mappings[output_rule->mapping_count - 1].Id = strdup(id);
|
||||||
|
output_rule->mappings[output_rule->mapping_count - 1].Tag = strdup(tag);
|
||||||
|
}
|
||||||
|
} else if (state == STATE_CODE) {
|
||||||
|
int len = (int)strlen(trimmed);
|
||||||
|
if (trimmed[0] == '%' && trimmed[len - 1] == '%') {
|
||||||
|
// Language definition block like %c% or %c#% or %csharp%
|
||||||
|
char lang_name[256];
|
||||||
|
strncpy(lang_name, trimmed + 1, len - 2);
|
||||||
|
lang_name[len - 2] = '\0';
|
||||||
|
char* trimmed_lang = trim_whitespace(lang_name);
|
||||||
|
|
||||||
|
if (strcmp(trimmed_lang, "c") == 0) {
|
||||||
|
current_lang = c_language;
|
||||||
|
has_lang = true;
|
||||||
|
} else if (strcmp(trimmed_lang, "c#") == 0 || strcmp(trimmed_lang, "csharp") == 0) {
|
||||||
|
current_lang = csharp;
|
||||||
|
has_lang = true;
|
||||||
|
} else {
|
||||||
|
has_lang = false;
|
||||||
|
}
|
||||||
|
} else if (has_lang && strcmp(trimmed, "%post_processor") == 0) {
|
||||||
|
// Read all lines until "post_processor%"
|
||||||
|
int cap = 4096;
|
||||||
|
char* code = (char*)malloc(cap);
|
||||||
|
code[0] = '\0';
|
||||||
|
int code_len = 0;
|
||||||
|
|
||||||
|
char code_line[4096];
|
||||||
|
while (get_next_line(&cursor, code_line, sizeof(code_line))) {
|
||||||
|
char* trimmed_code = trim_whitespace(code_line);
|
||||||
|
if (strcmp(trimmed_code, "post_processor%") == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
int line_len = (int)strlen(code_line);
|
||||||
|
if (code_len + line_len + 2 >= cap) {
|
||||||
|
cap *= 2;
|
||||||
|
code = (char*)realloc(code, cap);
|
||||||
|
}
|
||||||
|
strcat(code, code_line);
|
||||||
|
strcat(code, "\n");
|
||||||
|
code_len += line_len + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add or update code block
|
||||||
|
int block_idx = -1;
|
||||||
|
for (uint64_t i = 0; i < output_rule->code_block_count; i++) {
|
||||||
|
if (output_rule->code_blocks[i].target_languge == current_lang) {
|
||||||
|
block_idx = (int)i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (block_idx == -1) {
|
||||||
|
output_rule->code_block_count++;
|
||||||
|
output_rule->code_blocks = (code_block*)realloc(output_rule->code_blocks, output_rule->code_block_count * sizeof(code_block));
|
||||||
|
block_idx = (int)output_rule->code_block_count - 1;
|
||||||
|
output_rule->code_blocks[block_idx].target_languge = current_lang;
|
||||||
|
output_rule->code_blocks[block_idx].post_processor_code = NULL;
|
||||||
|
output_rule->code_blocks[block_idx].variables = NULL;
|
||||||
|
}
|
||||||
|
output_rule->code_blocks[block_idx].post_processor_code = code;
|
||||||
|
} else if (has_lang && strcmp(trimmed, "%variables") == 0) {
|
||||||
|
// Read all lines until "variables%"
|
||||||
|
int cap = 4096;
|
||||||
|
char* vars = (char*)malloc(cap);
|
||||||
|
vars[0] = '\0';
|
||||||
|
int vars_len = 0;
|
||||||
|
|
||||||
|
char vars_line[4096];
|
||||||
|
while (get_next_line(&cursor, vars_line, sizeof(vars_line))) {
|
||||||
|
char* trimmed_vars = trim_whitespace(vars_line);
|
||||||
|
if (strcmp(trimmed_vars, "variables%") == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
int line_len = (int)strlen(vars_line);
|
||||||
|
if (vars_len + line_len + 2 >= cap) {
|
||||||
|
cap *= 2;
|
||||||
|
vars = (char*)realloc(vars, cap);
|
||||||
|
}
|
||||||
|
strcat(vars, vars_line);
|
||||||
|
strcat(vars, "\n");
|
||||||
|
vars_len += line_len + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add or update code block
|
||||||
|
int block_idx = -1;
|
||||||
|
for (uint64_t i = 0; i < output_rule->code_block_count; i++) {
|
||||||
|
if (output_rule->code_blocks[i].target_languge == current_lang) {
|
||||||
|
block_idx = (int)i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (block_idx == -1) {
|
||||||
|
output_rule->code_block_count++;
|
||||||
|
output_rule->code_blocks = (code_block*)realloc(output_rule->code_blocks, output_rule->code_block_count * sizeof(code_block));
|
||||||
|
block_idx = (int)output_rule->code_block_count - 1;
|
||||||
|
output_rule->code_blocks[block_idx].target_languge = current_lang;
|
||||||
|
output_rule->code_blocks[block_idx].post_processor_code = NULL;
|
||||||
|
output_rule->code_blocks[block_idx].variables = NULL;
|
||||||
|
}
|
||||||
|
output_rule->code_blocks[block_idx].variables = vars;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool slex_read_rule_from_file(FILE *f, slex_rules *output_rule) {
|
||||||
|
if (!f || !output_rule) return false;
|
||||||
|
|
||||||
|
// Determine file size
|
||||||
|
fseek(f, 0, SEEK_END);
|
||||||
|
long size = ftell(f);
|
||||||
|
fseek(f, 0, SEEK_SET);
|
||||||
|
|
||||||
|
char* content = (char*)malloc(size + 1);
|
||||||
|
if (!content) return false;
|
||||||
|
|
||||||
|
size_t read_bytes = fread(content, 1, size, f);
|
||||||
|
content[read_bytes] = '\0';
|
||||||
|
|
||||||
|
bool success = slex_read_rule_from_cstr(content, output_rule);
|
||||||
|
free(content);
|
||||||
|
return success;
|
||||||
|
}
|
||||||
@@ -0,0 +1,702 @@
|
|||||||
|
#include "../Headers/slex_regex.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
// Token representation for Regex Parsing
|
||||||
|
typedef enum {
|
||||||
|
TOKEN_CHAR,
|
||||||
|
TOKEN_CHAR_SET,
|
||||||
|
TOKEN_CONCAT,
|
||||||
|
TOKEN_ALT,
|
||||||
|
TOKEN_STAR,
|
||||||
|
TOKEN_PLUS,
|
||||||
|
TOKEN_QUESTION,
|
||||||
|
TOKEN_LPAREN,
|
||||||
|
TOKEN_RPAREN,
|
||||||
|
} RegexTokenType;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
RegexTokenType type;
|
||||||
|
bool char_set[256];
|
||||||
|
} RegexToken;
|
||||||
|
|
||||||
|
// Global array to track all allocated NFA states for easy deallocation
|
||||||
|
static NFAState** g_nfa_states = NULL;
|
||||||
|
static int g_nfa_state_count = 0;
|
||||||
|
static int g_nfa_state_capacity = 0;
|
||||||
|
|
||||||
|
static NFAState* create_nfa_state() {
|
||||||
|
NFAState* s = (NFAState*)malloc(sizeof(NFAState));
|
||||||
|
s->id = g_nfa_state_count;
|
||||||
|
s->is_epsilon = false;
|
||||||
|
memset(s->char_set, 0, sizeof(s->char_set));
|
||||||
|
s->edge1 = NULL;
|
||||||
|
s->edge2 = NULL;
|
||||||
|
s->accept_rule_index = -1;
|
||||||
|
|
||||||
|
// Track state globally
|
||||||
|
if (g_nfa_state_count >= g_nfa_state_capacity) {
|
||||||
|
g_nfa_state_capacity = g_nfa_state_capacity == 0 ? 1024 : g_nfa_state_capacity * 2;
|
||||||
|
g_nfa_states = (NFAState**)realloc(g_nfa_states, g_nfa_state_capacity * sizeof(NFAState*));
|
||||||
|
}
|
||||||
|
g_nfa_states[g_nfa_state_count++] = s;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void free_all_nfa_states() {
|
||||||
|
for (int i = 0; i < g_nfa_state_count; i++) {
|
||||||
|
free(g_nfa_states[i]);
|
||||||
|
}
|
||||||
|
free(g_nfa_states);
|
||||||
|
g_nfa_states = NULL;
|
||||||
|
g_nfa_state_count = 0;
|
||||||
|
g_nfa_state_capacity = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tokenize a regex pattern
|
||||||
|
static RegexToken* tokenize_regex(const char* pattern, int* token_count_out) {
|
||||||
|
int capacity = 128;
|
||||||
|
int count = 0;
|
||||||
|
RegexToken* tokens = (RegexToken*)malloc(capacity * sizeof(RegexToken));
|
||||||
|
int len = (int)strlen(pattern);
|
||||||
|
int idx = 0;
|
||||||
|
|
||||||
|
while (idx < len) {
|
||||||
|
if (count >= capacity) {
|
||||||
|
capacity *= 2;
|
||||||
|
tokens = (RegexToken*)realloc(tokens, capacity * sizeof(RegexToken));
|
||||||
|
}
|
||||||
|
|
||||||
|
char c = pattern[idx];
|
||||||
|
|
||||||
|
if (c == '\\') {
|
||||||
|
idx++;
|
||||||
|
if (idx >= len) {
|
||||||
|
// Trailing backslash, treat as literal backslash
|
||||||
|
tokens[count].type = TOKEN_CHAR;
|
||||||
|
memset(tokens[count].char_set, 0, 256);
|
||||||
|
tokens[count].char_set[(unsigned char)'\\'] = true;
|
||||||
|
count++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
char esc = pattern[idx++];
|
||||||
|
tokens[count].type = TOKEN_CHAR_SET;
|
||||||
|
memset(tokens[count].char_set, 0, 256);
|
||||||
|
|
||||||
|
if (esc == 'p' && idx < len && pattern[idx] == '{') {
|
||||||
|
idx++; // skip '{'
|
||||||
|
char prop[256];
|
||||||
|
int p_idx = 0;
|
||||||
|
while (idx < len && pattern[idx] != '}') {
|
||||||
|
prop[p_idx++] = pattern[idx++];
|
||||||
|
}
|
||||||
|
prop[p_idx] = '\0';
|
||||||
|
if (idx < len && pattern[idx] == '}') {
|
||||||
|
idx++; // skip '}'
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strcmp(prop, "P") == 0) {
|
||||||
|
const char* punct = "!\"#%&'()*,-./:;?@[\\]_{}";
|
||||||
|
for (int k = 0; punct[k] != '\0'; k++) {
|
||||||
|
tokens[count].char_set[(unsigned char)punct[k]] = true;
|
||||||
|
}
|
||||||
|
} else if (strcmp(prop, "S") == 0) {
|
||||||
|
const char* sym = "$+<=>^`|~";
|
||||||
|
for (int k = 0; sym[k] != '\0'; k++) {
|
||||||
|
tokens[count].char_set[(unsigned char)sym[k]] = true;
|
||||||
|
}
|
||||||
|
} else if (strcmp(prop, "L") == 0) {
|
||||||
|
for (int d = 'a'; d <= 'z'; d++) tokens[count].char_set[d] = true;
|
||||||
|
for (int d = 'A'; d <= 'Z'; d++) tokens[count].char_set[d] = true;
|
||||||
|
} else if (strcmp(prop, "N") == 0) {
|
||||||
|
for (int d = '0'; d <= '9'; d++) tokens[count].char_set[d] = true;
|
||||||
|
}
|
||||||
|
} else if (esc == 'n') {
|
||||||
|
tokens[count].char_set[10] = true; // LF
|
||||||
|
} else if (esc == 't') {
|
||||||
|
tokens[count].char_set[9] = true; // TAB
|
||||||
|
} else if (esc == 'r') {
|
||||||
|
tokens[count].char_set[13] = true; // CR
|
||||||
|
} else if (esc == 's') {
|
||||||
|
tokens[count].char_set[32] = true; // Space
|
||||||
|
tokens[count].char_set[9] = true; // TAB
|
||||||
|
tokens[count].char_set[13] = true; // CR
|
||||||
|
tokens[count].char_set[10] = true; // LF
|
||||||
|
} else if (esc == 'd') {
|
||||||
|
for (int d = '0'; d <= '9'; d++) tokens[count].char_set[d] = true;
|
||||||
|
} else if (esc == 'w') {
|
||||||
|
for (int d = '0'; d <= '9'; d++) tokens[count].char_set[d] = true;
|
||||||
|
for (int d = 'a'; d <= 'z'; d++) tokens[count].char_set[d] = true;
|
||||||
|
for (int d = 'A'; d <= 'Z'; d++) tokens[count].char_set[d] = true;
|
||||||
|
tokens[count].char_set[(unsigned char)'_'] = true;
|
||||||
|
} else {
|
||||||
|
// Literal escaped character
|
||||||
|
tokens[count].type = TOKEN_CHAR;
|
||||||
|
tokens[count].char_set[(unsigned char)esc] = true;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
} else if (c == '[') {
|
||||||
|
idx++;
|
||||||
|
bool negate = false;
|
||||||
|
if (idx < len && pattern[idx] == '^') {
|
||||||
|
negate = true;
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens[count].type = TOKEN_CHAR_SET;
|
||||||
|
memset(tokens[count].char_set, 0, 256);
|
||||||
|
|
||||||
|
while (idx < len && pattern[idx] != ']') {
|
||||||
|
char c1 = pattern[idx++];
|
||||||
|
if (c1 == '\\' && idx < len) {
|
||||||
|
char esc = pattern[idx++];
|
||||||
|
if (esc == 'p' && idx < len && pattern[idx] == '{') {
|
||||||
|
idx++; // skip '{'
|
||||||
|
char prop[256];
|
||||||
|
int p_idx = 0;
|
||||||
|
while (idx < len && pattern[idx] != '}') {
|
||||||
|
prop[p_idx++] = pattern[idx++];
|
||||||
|
}
|
||||||
|
prop[p_idx] = '\0';
|
||||||
|
if (idx < len && pattern[idx] == '}') {
|
||||||
|
idx++; // skip '}'
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strcmp(prop, "P") == 0) {
|
||||||
|
const char* punct = "!\"#%&'()*,-./:;?@[\\]_{}";
|
||||||
|
for (int k = 0; punct[k] != '\0'; k++) {
|
||||||
|
tokens[count].char_set[(unsigned char)punct[k]] = true;
|
||||||
|
}
|
||||||
|
} else if (strcmp(prop, "S") == 0) {
|
||||||
|
const char* sym = "$+<=>^`|~";
|
||||||
|
for (int k = 0; sym[k] != '\0'; k++) {
|
||||||
|
tokens[count].char_set[(unsigned char)sym[k]] = true;
|
||||||
|
}
|
||||||
|
} else if (strcmp(prop, "L") == 0) {
|
||||||
|
for (int d = 'a'; d <= 'z'; d++) tokens[count].char_set[d] = true;
|
||||||
|
for (int d = 'A'; d <= 'Z'; d++) tokens[count].char_set[d] = true;
|
||||||
|
} else if (strcmp(prop, "N") == 0) {
|
||||||
|
for (int d = '0'; d <= '9'; d++) tokens[count].char_set[d] = true;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
} else if (esc == 'n') c1 = '\n';
|
||||||
|
else if (esc == 't') c1 = '\t';
|
||||||
|
else if (esc == 'r') c1 = '\r';
|
||||||
|
else if (esc == 's') {
|
||||||
|
tokens[count].char_set[32] = true;
|
||||||
|
tokens[count].char_set[9] = true;
|
||||||
|
tokens[count].char_set[13] = true;
|
||||||
|
tokens[count].char_set[10] = true;
|
||||||
|
continue;
|
||||||
|
} else if (esc == 'd') {
|
||||||
|
for (int d = '0'; d <= '9'; d++) tokens[count].char_set[d] = true;
|
||||||
|
continue;
|
||||||
|
} else if (esc == 'w') {
|
||||||
|
for (int d = '0'; d <= '9'; d++) tokens[count].char_set[d] = true;
|
||||||
|
for (int d = 'a'; d <= 'z'; d++) tokens[count].char_set[d] = true;
|
||||||
|
for (int d = 'A'; d <= 'Z'; d++) tokens[count].char_set[d] = true;
|
||||||
|
tokens[count].char_set[(unsigned char)'_'] = true;
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
c1 = esc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check range: c1-c2
|
||||||
|
if (idx + 1 < len && pattern[idx] == '-' && pattern[idx + 1] != ']') {
|
||||||
|
idx++; // skip '-'
|
||||||
|
char c2 = pattern[idx++];
|
||||||
|
if (c2 == '\\' && idx < len) {
|
||||||
|
char esc = pattern[idx++];
|
||||||
|
if (esc == 'n') c2 = '\n';
|
||||||
|
else if (esc == 't') c2 = '\t';
|
||||||
|
else if (esc == 'r') c2 = '\r';
|
||||||
|
else c2 = esc;
|
||||||
|
}
|
||||||
|
for (int r = (unsigned char)c1; r <= (unsigned char)c2; r++) {
|
||||||
|
tokens[count].char_set[r] = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tokens[count].char_set[(unsigned char)c1] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (idx < len && pattern[idx] == ']') {
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (negate) {
|
||||||
|
for (int i = 0; i < 256; i++) {
|
||||||
|
tokens[count].char_set[i] = !tokens[count].char_set[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
} else if (c == '.') {
|
||||||
|
tokens[count].type = TOKEN_CHAR_SET;
|
||||||
|
memset(tokens[count].char_set, 0, 256);
|
||||||
|
for (int i = 0; i < 256; i++) {
|
||||||
|
if (i != 10) { // any character except newline
|
||||||
|
tokens[count].char_set[i] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
idx++;
|
||||||
|
} else if (c == '*') {
|
||||||
|
tokens[count].type = TOKEN_STAR;
|
||||||
|
count++;
|
||||||
|
idx++;
|
||||||
|
} else if (c == '+') {
|
||||||
|
tokens[count].type = TOKEN_PLUS;
|
||||||
|
count++;
|
||||||
|
idx++;
|
||||||
|
} else if (c == '?') {
|
||||||
|
tokens[count].type = TOKEN_QUESTION;
|
||||||
|
count++;
|
||||||
|
idx++;
|
||||||
|
} else if (c == '|') {
|
||||||
|
tokens[count].type = TOKEN_ALT;
|
||||||
|
count++;
|
||||||
|
idx++;
|
||||||
|
} else if (c == '(') {
|
||||||
|
tokens[count].type = TOKEN_LPAREN;
|
||||||
|
count++;
|
||||||
|
idx++;
|
||||||
|
} else if (c == ')') {
|
||||||
|
tokens[count].type = TOKEN_RPAREN;
|
||||||
|
count++;
|
||||||
|
idx++;
|
||||||
|
} else {
|
||||||
|
tokens[count].type = TOKEN_CHAR;
|
||||||
|
memset(tokens[count].char_set, 0, 256);
|
||||||
|
tokens[count].char_set[(unsigned char)c] = true;
|
||||||
|
count++;
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*token_count_out = count;
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert explicit concatenation operators
|
||||||
|
static RegexToken* insert_concat(RegexToken* input, int input_count, int* output_count_out) {
|
||||||
|
int capacity = input_count * 2;
|
||||||
|
int count = 0;
|
||||||
|
RegexToken* output = (RegexToken*)malloc(capacity * sizeof(RegexToken));
|
||||||
|
|
||||||
|
for (int i = 0; i < input_count; i++) {
|
||||||
|
if (count >= capacity) {
|
||||||
|
capacity *= 2;
|
||||||
|
output = (RegexToken*)realloc(output, capacity * sizeof(RegexToken));
|
||||||
|
}
|
||||||
|
|
||||||
|
output[count++] = input[i];
|
||||||
|
|
||||||
|
if (i + 1 < input_count) {
|
||||||
|
RegexTokenType t1 = input[i].type;
|
||||||
|
RegexTokenType t2 = input[i + 1].type;
|
||||||
|
|
||||||
|
bool t1_can_concat = (t1 == TOKEN_CHAR || t1 == TOKEN_CHAR_SET || t1 == TOKEN_STAR || t1 == TOKEN_PLUS || t1 == TOKEN_QUESTION || t1 == TOKEN_RPAREN);
|
||||||
|
bool t2_can_concat = (t2 == TOKEN_CHAR || t2 == TOKEN_CHAR_SET || t2 == TOKEN_LPAREN);
|
||||||
|
|
||||||
|
if (t1_can_concat && t2_can_concat) {
|
||||||
|
if (count >= capacity) {
|
||||||
|
capacity *= 2;
|
||||||
|
output = (RegexToken*)realloc(output, capacity * sizeof(RegexToken));
|
||||||
|
}
|
||||||
|
output[count].type = TOKEN_CONCAT;
|
||||||
|
memset(output[count].char_set, 0, 256);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*output_count_out = count;
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shunting-yard algorithm to convert infix tokens to postfix tokens
|
||||||
|
static RegexToken* infix_to_postfix(RegexToken* infix, int infix_count, int* postfix_count_out) {
|
||||||
|
int capacity = infix_count;
|
||||||
|
int postfix_count = 0;
|
||||||
|
RegexToken* postfix = (RegexToken*)malloc(capacity * sizeof(RegexToken));
|
||||||
|
|
||||||
|
RegexToken stack[512];
|
||||||
|
int stack_top = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < infix_count; i++) {
|
||||||
|
RegexToken t = infix[i];
|
||||||
|
|
||||||
|
if (t.type == TOKEN_CHAR || t.type == TOKEN_CHAR_SET) {
|
||||||
|
if (postfix_count >= capacity) {
|
||||||
|
capacity *= 2;
|
||||||
|
postfix = (RegexToken*)realloc(postfix, capacity * sizeof(RegexToken));
|
||||||
|
}
|
||||||
|
postfix[postfix_count++] = t;
|
||||||
|
} else if (t.type == TOKEN_LPAREN) {
|
||||||
|
stack[stack_top++] = t;
|
||||||
|
} else if (t.type == TOKEN_RPAREN) {
|
||||||
|
while (stack_top > 0 && stack[stack_top - 1].type != TOKEN_LPAREN) {
|
||||||
|
if (postfix_count >= capacity) {
|
||||||
|
capacity *= 2;
|
||||||
|
postfix = (RegexToken*)realloc(postfix, capacity * sizeof(RegexToken));
|
||||||
|
}
|
||||||
|
postfix[postfix_count++] = stack[--stack_top];
|
||||||
|
}
|
||||||
|
if (stack_top > 0) {
|
||||||
|
stack_top--; // pop LPAREN
|
||||||
|
}
|
||||||
|
} else if (t.type == TOKEN_STAR || t.type == TOKEN_PLUS || t.type == TOKEN_QUESTION) {
|
||||||
|
// Unary operators have highest precedence and are postfix, output immediately
|
||||||
|
if (postfix_count >= capacity) {
|
||||||
|
capacity *= 2;
|
||||||
|
postfix = (RegexToken*)realloc(postfix, capacity * sizeof(RegexToken));
|
||||||
|
}
|
||||||
|
postfix[postfix_count++] = t;
|
||||||
|
} else {
|
||||||
|
// Binary operators (CONCAT, ALT)
|
||||||
|
int p_curr = (t.type == TOKEN_ALT) ? 1 : 2;
|
||||||
|
while (stack_top > 0) {
|
||||||
|
RegexTokenType top_type = stack[stack_top - 1].type;
|
||||||
|
if (top_type == TOKEN_CONCAT || top_type == TOKEN_ALT) {
|
||||||
|
int p_top = (top_type == TOKEN_ALT) ? 1 : 2;
|
||||||
|
if (p_top >= p_curr) {
|
||||||
|
if (postfix_count >= capacity) {
|
||||||
|
capacity *= 2;
|
||||||
|
postfix = (RegexToken*)realloc(postfix, capacity * sizeof(RegexToken));
|
||||||
|
}
|
||||||
|
postfix[postfix_count++] = stack[--stack_top];
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stack[stack_top++] = t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (stack_top > 0) {
|
||||||
|
if (postfix_count >= capacity) {
|
||||||
|
capacity *= 2;
|
||||||
|
postfix = (RegexToken*)realloc(postfix, capacity * sizeof(RegexToken));
|
||||||
|
}
|
||||||
|
postfix[postfix_count++] = stack[--stack_top];
|
||||||
|
}
|
||||||
|
|
||||||
|
*postfix_count_out = postfix_count;
|
||||||
|
return postfix;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build NFA from postfix tokens using Thompson's construction
|
||||||
|
static NFAFragment build_nfa(RegexToken* postfix, int postfix_count) {
|
||||||
|
NFAFragment stack[512];
|
||||||
|
int stack_top = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < postfix_count; i++) {
|
||||||
|
RegexToken t = postfix[i];
|
||||||
|
|
||||||
|
if (t.type == TOKEN_CHAR || t.type == TOKEN_CHAR_SET) {
|
||||||
|
NFAState* start = create_nfa_state();
|
||||||
|
NFAState* accept = create_nfa_state();
|
||||||
|
start->is_epsilon = false;
|
||||||
|
memcpy(start->char_set, t.char_set, 256);
|
||||||
|
start->edge1 = accept;
|
||||||
|
|
||||||
|
NFAFragment frag = {start, accept};
|
||||||
|
stack[stack_top++] = frag;
|
||||||
|
} else if (t.type == TOKEN_CONCAT) {
|
||||||
|
NFAFragment f2 = stack[--stack_top];
|
||||||
|
NFAFragment f1 = stack[--stack_top];
|
||||||
|
|
||||||
|
f1.accept->is_epsilon = true;
|
||||||
|
f1.accept->edge1 = f2.start;
|
||||||
|
|
||||||
|
NFAFragment frag = {f1.start, f2.accept};
|
||||||
|
stack[stack_top++] = frag;
|
||||||
|
} else if (t.type == TOKEN_ALT) {
|
||||||
|
NFAFragment f2 = stack[--stack_top];
|
||||||
|
NFAFragment f1 = stack[--stack_top];
|
||||||
|
|
||||||
|
NFAState* start = create_nfa_state();
|
||||||
|
NFAState* accept = create_nfa_state();
|
||||||
|
|
||||||
|
start->is_epsilon = true;
|
||||||
|
start->edge1 = f1.start;
|
||||||
|
start->edge2 = f2.start;
|
||||||
|
|
||||||
|
f1.accept->is_epsilon = true;
|
||||||
|
f1.accept->edge1 = accept;
|
||||||
|
|
||||||
|
f2.accept->is_epsilon = true;
|
||||||
|
f2.accept->edge1 = accept;
|
||||||
|
|
||||||
|
NFAFragment frag = {start, accept};
|
||||||
|
stack[stack_top++] = frag;
|
||||||
|
} else if (t.type == TOKEN_STAR) {
|
||||||
|
NFAFragment f1 = stack[--stack_top];
|
||||||
|
|
||||||
|
NFAState* start = create_nfa_state();
|
||||||
|
NFAState* accept = create_nfa_state();
|
||||||
|
|
||||||
|
start->is_epsilon = true;
|
||||||
|
start->edge1 = f1.start;
|
||||||
|
start->edge2 = accept;
|
||||||
|
|
||||||
|
f1.accept->is_epsilon = true;
|
||||||
|
f1.accept->edge1 = f1.start;
|
||||||
|
f1.accept->edge2 = accept;
|
||||||
|
|
||||||
|
NFAFragment frag = {start, accept};
|
||||||
|
stack[stack_top++] = frag;
|
||||||
|
} else if (t.type == TOKEN_PLUS) {
|
||||||
|
NFAFragment f1 = stack[--stack_top];
|
||||||
|
|
||||||
|
NFAState* start = create_nfa_state();
|
||||||
|
NFAState* accept = create_nfa_state();
|
||||||
|
|
||||||
|
start->is_epsilon = true;
|
||||||
|
start->edge1 = f1.start;
|
||||||
|
|
||||||
|
f1.accept->is_epsilon = true;
|
||||||
|
f1.accept->edge1 = f1.start;
|
||||||
|
f1.accept->edge2 = accept;
|
||||||
|
|
||||||
|
NFAFragment frag = {start, accept};
|
||||||
|
stack[stack_top++] = frag;
|
||||||
|
} else if (t.type == TOKEN_QUESTION) {
|
||||||
|
NFAFragment f1 = stack[--stack_top];
|
||||||
|
|
||||||
|
NFAState* start = create_nfa_state();
|
||||||
|
NFAState* accept = create_nfa_state();
|
||||||
|
|
||||||
|
start->is_epsilon = true;
|
||||||
|
start->edge1 = f1.start;
|
||||||
|
start->edge2 = accept;
|
||||||
|
|
||||||
|
f1.accept->is_epsilon = true;
|
||||||
|
f1.accept->edge1 = accept;
|
||||||
|
|
||||||
|
NFAFragment frag = {start, accept};
|
||||||
|
stack[stack_top++] = frag;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return stack[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Computes epsilon closure of a set of NFA states
|
||||||
|
static void get_epsilon_closure(int* input_states, int input_count, NFAState** all_nfa_states, int total_nfa_states, int** output_states, int* output_count) {
|
||||||
|
bool* visited = (bool*)calloc(total_nfa_states, sizeof(bool));
|
||||||
|
int* queue = (int*)malloc(total_nfa_states * sizeof(int));
|
||||||
|
int head = 0, tail = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < input_count; i++) {
|
||||||
|
int id = input_states[i];
|
||||||
|
visited[id] = true;
|
||||||
|
queue[tail++] = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (head < tail) {
|
||||||
|
int curr_id = queue[head++];
|
||||||
|
NFAState* s = all_nfa_states[curr_id];
|
||||||
|
if (s->is_epsilon) {
|
||||||
|
if (s->edge1 && !visited[s->edge1->id]) {
|
||||||
|
visited[s->edge1->id] = true;
|
||||||
|
queue[tail++] = s->edge1->id;
|
||||||
|
}
|
||||||
|
if (s->edge2 && !visited[s->edge2->id]) {
|
||||||
|
visited[s->edge2->id] = true;
|
||||||
|
queue[tail++] = s->edge2->id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int count = 0;
|
||||||
|
for (int i = 0; i < total_nfa_states; i++) {
|
||||||
|
if (visited[i]) count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
int* res = (int*)malloc(count * sizeof(int));
|
||||||
|
int idx = 0;
|
||||||
|
for (int i = 0; i < total_nfa_states; i++) {
|
||||||
|
if (visited[i]) {
|
||||||
|
res[idx++] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*output_states = res;
|
||||||
|
*output_count = count;
|
||||||
|
free(visited);
|
||||||
|
free(queue);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare two NFA state sets
|
||||||
|
static bool are_nfa_sets_equal(int* a, int a_count, int* b, int b_count) {
|
||||||
|
if (a_count != b_count) return false;
|
||||||
|
for (int i = 0; i < a_count; i++) {
|
||||||
|
if (a[i] != b[i]) return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compiles a set of regular expression patterns into a complete DFA using subset construction
|
||||||
|
DFAState* slex_compile_regexes(char** patterns, int pattern_count, int* dfa_state_count_out) {
|
||||||
|
free_all_nfa_states(); // Reset global state tracker
|
||||||
|
|
||||||
|
// 1. Build NFA for each pattern
|
||||||
|
NFAFragment* fragments = (NFAFragment*)malloc(pattern_count * sizeof(NFAFragment));
|
||||||
|
for (int i = 0; i < pattern_count; i++) {
|
||||||
|
int t_count = 0, concat_count = 0, post_count = 0;
|
||||||
|
RegexToken* tokens = tokenize_regex(patterns[i], &t_count);
|
||||||
|
RegexToken* tokens_concat = insert_concat(tokens, t_count, &concat_count);
|
||||||
|
RegexToken* tokens_postfix = infix_to_postfix(tokens_concat, concat_count, &post_count);
|
||||||
|
|
||||||
|
fragments[i] = build_nfa(tokens_postfix, post_count);
|
||||||
|
fragments[i].accept->accept_rule_index = i;
|
||||||
|
|
||||||
|
free(tokens);
|
||||||
|
free(tokens_concat);
|
||||||
|
free(tokens_postfix);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Create global start state with epsilon transitions to each pattern NFA's start state
|
||||||
|
NFAState* global_start = create_nfa_state();
|
||||||
|
global_start->is_epsilon = true;
|
||||||
|
|
||||||
|
NFAState* current_hub = global_start;
|
||||||
|
for (int i = 0; i < pattern_count; i++) {
|
||||||
|
if (i == pattern_count - 1) {
|
||||||
|
current_hub->edge1 = fragments[i].start;
|
||||||
|
} else {
|
||||||
|
NFAState* next_hub = create_nfa_state();
|
||||||
|
next_hub->is_epsilon = true;
|
||||||
|
current_hub->edge1 = fragments[i].start;
|
||||||
|
current_hub->edge2 = next_hub;
|
||||||
|
current_hub = next_hub;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(fragments);
|
||||||
|
|
||||||
|
// 3. Subset construction
|
||||||
|
int total_nfa_states = g_nfa_state_count;
|
||||||
|
NFAState** all_nfa_states = g_nfa_states;
|
||||||
|
|
||||||
|
int dfa_capacity = 1024;
|
||||||
|
int dfa_count = 0;
|
||||||
|
DFAState* dfa_states = (DFAState*)malloc(dfa_capacity * sizeof(DFAState));
|
||||||
|
|
||||||
|
// Queue for subset construction
|
||||||
|
int* work_queue = (int*)malloc(dfa_capacity * sizeof(int));
|
||||||
|
int queue_head = 0, queue_tail = 0;
|
||||||
|
|
||||||
|
// Start state epsilon closure
|
||||||
|
int start_nfa_id = global_start->id;
|
||||||
|
int* start_closure = NULL;
|
||||||
|
int start_closure_count = 0;
|
||||||
|
get_epsilon_closure(&start_nfa_id, 1, all_nfa_states, total_nfa_states, &start_closure, &start_closure_count);
|
||||||
|
|
||||||
|
// Create start DFA state (0)
|
||||||
|
dfa_states[dfa_count].id = dfa_count;
|
||||||
|
dfa_states[dfa_count].nfa_states = start_closure;
|
||||||
|
dfa_states[dfa_count].nfa_state_count = start_closure_count;
|
||||||
|
memset(dfa_states[dfa_count].transitions, -1, sizeof(dfa_states[dfa_count].transitions));
|
||||||
|
dfa_states[dfa_count].accept_rule_index = -1;
|
||||||
|
|
||||||
|
work_queue[queue_tail++] = dfa_count;
|
||||||
|
dfa_count++;
|
||||||
|
|
||||||
|
// Process queue
|
||||||
|
while (queue_head < queue_tail) {
|
||||||
|
int curr_dfa_id = work_queue[queue_head++];
|
||||||
|
|
||||||
|
// For each possible ASCII character transition
|
||||||
|
for (int c = 0; c < 256; c++) {
|
||||||
|
// Find NFA states reachable on character 'c'
|
||||||
|
int* reachable = (int*)malloc(total_nfa_states * sizeof(int));
|
||||||
|
int reachable_count = 0;
|
||||||
|
|
||||||
|
DFAState* curr_dfa = &dfa_states[curr_dfa_id];
|
||||||
|
for (int i = 0; i < curr_dfa->nfa_state_count; i++) {
|
||||||
|
NFAState* nfa_s = all_nfa_states[curr_dfa->nfa_states[i]];
|
||||||
|
if (!nfa_s->is_epsilon && nfa_s->char_set[c]) {
|
||||||
|
if (nfa_s->edge1) {
|
||||||
|
reachable[reachable_count++] = nfa_s->edge1->id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (reachable_count > 0) {
|
||||||
|
// Compute epsilon closure of reachable NFA states
|
||||||
|
int* closure = NULL;
|
||||||
|
int closure_count = 0;
|
||||||
|
get_epsilon_closure(reachable, reachable_count, all_nfa_states, total_nfa_states, &closure, &closure_count);
|
||||||
|
free(reachable);
|
||||||
|
|
||||||
|
// Check if this DFA state already exists
|
||||||
|
int existing_id = -1;
|
||||||
|
for (int d = 0; d < dfa_count; d++) {
|
||||||
|
if (are_nfa_sets_equal(dfa_states[d].nfa_states, dfa_states[d].nfa_state_count, closure, closure_count)) {
|
||||||
|
existing_id = d;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (existing_id != -1) {
|
||||||
|
dfa_states[curr_dfa_id].transitions[c] = existing_id;
|
||||||
|
free(closure);
|
||||||
|
} else {
|
||||||
|
if (dfa_count >= dfa_capacity) {
|
||||||
|
dfa_capacity *= 2;
|
||||||
|
dfa_states = (DFAState*)realloc(dfa_states, dfa_capacity * sizeof(DFAState));
|
||||||
|
work_queue = (int*)realloc(work_queue, dfa_capacity * sizeof(int));
|
||||||
|
}
|
||||||
|
|
||||||
|
dfa_states[dfa_count].id = dfa_count;
|
||||||
|
dfa_states[dfa_count].nfa_states = closure;
|
||||||
|
dfa_states[dfa_count].nfa_state_count = closure_count;
|
||||||
|
memset(dfa_states[dfa_count].transitions, -1, sizeof(dfa_states[dfa_count].transitions));
|
||||||
|
dfa_states[dfa_count].accept_rule_index = -1;
|
||||||
|
|
||||||
|
dfa_states[curr_dfa_id].transitions[c] = dfa_count;
|
||||||
|
work_queue[queue_tail++] = dfa_count;
|
||||||
|
dfa_count++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
free(reachable);
|
||||||
|
dfa_states[curr_dfa_id].transitions[c] = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine accepting status of each DFA state based on NFA accept states
|
||||||
|
for (int d = 0; d < dfa_count; d++) {
|
||||||
|
int best_rule = -1;
|
||||||
|
for (int i = 0; i < dfa_states[d].nfa_state_count; i++) {
|
||||||
|
NFAState* nfa_s = all_nfa_states[dfa_states[d].nfa_states[i]];
|
||||||
|
if (nfa_s->accept_rule_index != -1) {
|
||||||
|
if (best_rule == -1 || nfa_s->accept_rule_index < best_rule) {
|
||||||
|
best_rule = nfa_s->accept_rule_index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dfa_states[d].accept_rule_index = best_rule;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(work_queue);
|
||||||
|
free_all_nfa_states(); // We no longer need the NFA states
|
||||||
|
|
||||||
|
*dfa_state_count_out = dfa_count;
|
||||||
|
return dfa_states;
|
||||||
|
}
|
||||||
|
|
||||||
|
void slex_free_dfa(DFAState* dfa_states, int dfa_state_count) {
|
||||||
|
if (dfa_states) {
|
||||||
|
for (int i = 0; i < dfa_state_count; i++) {
|
||||||
|
free(dfa_states[i].nfa_states);
|
||||||
|
}
|
||||||
|
free(dfa_states);
|
||||||
|
}
|
||||||
|
}
|
||||||
Binary file not shown.
Reference in New Issue
Block a user