From 2950db1efb93a5c1f30405632e62dba8d1eac716 Mon Sep 17 00:00:00 2001 From: Creeper Lv Date: Tue, 26 May 2026 04:45:32 +1000 Subject: [PATCH] Finished the implemenation with Antigravity. --- .gitignore | 1 + Headers/slex_regex.h | 38 ++ Source/Languages/C/slex_template.c | 317 ++++++++++- Source/Languages/CSharp/slex_template.c | 256 ++++++++- Source/cli/main.c | 184 +++++++ Source/slex.c | 15 +- Source/slex_parser.c | 249 +++++++++ Source/slex_regex.c | 702 ++++++++++++++++++++++++ bin/slex.exe | Bin 50970 -> 0 bytes env.sh | 3 + 10 files changed, 1760 insertions(+), 5 deletions(-) create mode 100644 .gitignore create mode 100644 Headers/slex_regex.h create mode 100644 Source/slex_parser.c create mode 100644 Source/slex_regex.c delete mode 100644 bin/slex.exe create mode 100644 env.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e660fd9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +bin/ diff --git a/Headers/slex_regex.h b/Headers/slex_regex.h new file mode 100644 index 0000000..9487cb5 --- /dev/null +++ b/Headers/slex_regex.h @@ -0,0 +1,38 @@ +#ifndef SLEX_REGEX_H +#define SLEX_REGEX_H + +#include +#include + +// Represents an NFA state +typedef struct NFAState { + int id; + bool is_epsilon; + bool char_set[256]; + struct NFAState* edge1; + struct NFAState* edge2; + int accept_rule_index; // -1 if not accepting, >= 0 for rule index (highest priority is lowest index) +} NFAState; + +// Represents an NFA fragment (start and accept states) +typedef struct NFAFragment { + NFAState* start; + NFAState* accept; +} NFAFragment; + +// Represents a DFA state +typedef struct DFAState { + int id; + int* nfa_states; // Sorted list of NFA state IDs that make up this DFA state + int nfa_state_count; + int transitions[256]; // DFA state transitions for each character (-1 if no transition) + int accept_rule_index; // -1 if not accepting, >= 0 if accepting (stores rule index) +} DFAState; + +// Compiles a set of regular expression patterns into a minimized/complete DFA +DFAState* slex_compile_regexes(char** patterns, int pattern_count, int* dfa_state_count_out); + +// Frees all DFA states allocated by slex_compile_regexes +void slex_free_dfa(DFAState* dfa_states, int dfa_state_count); + +#endif diff --git a/Source/Languages/C/slex_template.c b/Source/Languages/C/slex_template.c index a966708..3d0bc7d 100644 --- a/Source/Languages/C/slex_template.c +++ b/Source/Languages/C/slex_template.c @@ -1,7 +1,320 @@ #include "../../../Headers/slex_core.h" +#include "../../../Headers/slex_regex.h" +#include +#include +#include + +// Helper to check if a tag is mapped to an ID, and return it. +static const char* get_mapped_id(slex_rules* rules, const char* tag) { + for (uint64_t i = 0; i < rules->mapping_count; i++) { + if (strcmp(rules->mappings[i].Tag, tag) == 0) { + return rules->mappings[i].Id; + } + } + return NULL; +} + +static void generate_declarations(FILE* f, slex_options* options, slex_rules* rules) { + char* prefix = options->prefix ? options->prefix : "slex_"; + char* data_type = options->data_type_name ? options->data_type_name : "slex_segment"; + + fprintf(f, "#include \n"); + fprintf(f, "#include \n\n"); + + // slex_segment_tag enum + fprintf(f, "typedef enum %ssegment_tag {\n", prefix); + for (uint64_t i = 0; i < rules->rule_count; i++) { + fprintf(f, " %stag_%s,\n", prefix, rules->rules[i].Tag); + } + fprintf(f, "} %ssegment_tag;\n\n", prefix); + + // slex_segment_id enum + fprintf(f, "typedef enum %ssegment_id {\n", prefix); + fprintf(f, " %sid_default = 0,\n", prefix); + // Find all unique mapping Ids + for (uint64_t i = 0; i < rules->mapping_count; i++) { + // Only print if not already printed (unique check) + bool unique = true; + for (uint64_t j = 0; j < i; j++) { + if (strcmp(rules->mappings[j].Id, rules->mappings[i].Id) == 0) { + unique = false; + break; + } + } + if (unique) { + fprintf(f, " %sid_%s,\n", prefix, rules->mappings[i].Id); + } + } + fprintf(f, "} %ssegment_id;\n\n", prefix); + + // slex_segment struct + fprintf(f, "typedef struct %s {\n", data_type); + fprintf(f, " char* head;\n"); + fprintf(f, " int64_t length;\n"); + fprintf(f, " char* file_name;\n"); + fprintf(f, " int64_t line;\n"); + fprintf(f, " int64_t col;\n"); + fprintf(f, " %ssegment_tag tag;\n", prefix); + fprintf(f, " %ssegment_id id;\n", prefix); + fprintf(f, " struct %s* prev;\n", data_type); + fprintf(f, " struct %s* next;\n", data_type); + fprintf(f, "} %s;\n\n", data_type); + + // post_process_result enum + fprintf(f, "typedef enum %spost_process_result {\n", prefix); + fprintf(f, " %scontinue,\n", prefix); + fprintf(f, " %sskip,\n", prefix); + fprintf(f, " %scontinue_with_output,\n", prefix); + fprintf(f, "} %spost_process_result;\n\n", prefix); + + // Function declarations + fprintf(f, "%spost_process_result %spost_process(%s* input, %s** output);\n", prefix, prefix, data_type, data_type); + fprintf(f, "char %sfile(FILE* f, char* file_name, %s** head);\n", prefix, data_type); + fprintf(f, "char %scstr(char* input, char* file_name, %s** head);\n", prefix, data_type); + fprintf(f, "char %sfree(%s* head);\n\n", prefix, data_type); +} bool slex_translate_to_file_c(slex_options *options, slex_rules *rules, FILE *output_file) { - // TODO: Stub for moment. - return false; + if (!options || !rules || !output_file) return false; + + char* prefix = options->prefix ? options->prefix : "slex_"; + char* data_type = options->data_type_name ? options->data_type_name : "slex_segment"; + + // 1. Compile regexes to DFA + char** patterns = (char**)malloc(rules->rule_count * sizeof(char*)); + for (uint64_t i = 0; i < rules->rule_count; i++) { + patterns[i] = rules->rules[i].Pattern; + } + int dfa_state_count = 0; + DFAState* dfa = slex_compile_regexes(patterns, (int)rules->rule_count, &dfa_state_count); + free(patterns); + + if (!dfa) return false; + + // 2. Open header file if requested + FILE* header_f = NULL; + char* header_base = NULL; + if (options->header_output && strlen(options->header_output) > 0) { + header_f = fopen(options->header_output, "w"); + if (!header_f) { + slex_free_dfa(dfa, dfa_state_count); + return false; + } + + // Find basename of header_output to include it in the implementation + char* last_slash = strrchr(options->header_output, '/'); + char* last_backslash = strrchr(options->header_output, '\\'); + char* base = options->header_output; + if (last_slash && last_slash > base) base = last_slash + 1; + if (last_backslash && last_backslash > base) base = last_backslash + 1; + header_base = strdup(base); + + // Write header guard + fprintf(header_f, "#ifndef __SLEX_GENERATED_H__\n"); + fprintf(header_f, "#define __SLEX_GENERATED_H__\n\n"); + generate_declarations(header_f, options, rules); + fprintf(header_f, "#endif\n"); + fclose(header_f); + } + + // 3. Write implementation to output_file + fprintf(output_file, "#include \n"); + fprintf(output_file, "#include \n"); + fprintf(output_file, "#include \n"); + fprintf(output_file, "#include \n\n"); + + if (header_base) { + fprintf(output_file, "#include \"%s\"\n\n", header_base); + free(header_base); + } else { + // Output inline declarations + generate_declarations(output_file, options, rules); + } + + // Extract variables and post-processor code + char* variables = ""; + char* post_processor = ""; + for (uint64_t i = 0; i < rules->code_block_count; i++) { + if (rules->code_blocks[i].target_languge == c_language) { + if (rules->code_blocks[i].variables) { + variables = rules->code_blocks[i].variables; + } + if (rules->code_blocks[i].post_processor_code) { + post_processor = rules->code_blocks[i].post_processor_code; + } + break; + } + } + + // Write variables + if (strlen(variables) > 0) { + fprintf(output_file, "/* --- User Variables --- */\n%s\n/* ---------------------- */\n\n", variables); + } + + // Write slex_post_process function + fprintf(output_file, "%spost_process_result %spost_process(%s* input, %s** output) {\n", prefix, prefix, data_type, data_type); + if (strlen(post_processor) > 0) { + fprintf(output_file, "%s\n", post_processor); + } else { + // Default implementation + fprintf(output_file, " *output = input;\n"); + fprintf(output_file, " return %scontinue;\n", prefix); + } + fprintf(output_file, "}\n\n"); + + // Write transition table + fprintf(output_file, "static const int transitions[%d][256] = {\n", dfa_state_count); + for (int i = 0; i < dfa_state_count; i++) { + fprintf(output_file, " {"); + for (int c = 0; c < 256; c++) { + fprintf(output_file, "%d", dfa[i].transitions[c]); + if (c < 255) fprintf(output_file, ", "); + } + fprintf(output_file, "}"); + if (i < dfa_state_count - 1) fprintf(output_file, ",\n"); + else fprintf(output_file, "\n"); + } + fprintf(output_file, "};\n\n"); + + // Write accepting rules table + fprintf(output_file, "static const int accepting_rules[%d] = {\n ", dfa_state_count); + for (int i = 0; i < dfa_state_count; i++) { + fprintf(output_file, "%d", dfa[i].accept_rule_index); + if (i < dfa_state_count - 1) fprintf(output_file, ", "); + } + fprintf(output_file, "\n};\n\n"); + + // Write mapping mappings table/resolver + fprintf(output_file, "static void assign_tag_and_id(%s* node, int rule_idx) {\n", data_type); + fprintf(output_file, " switch(rule_idx) {\n"); + for (uint64_t i = 0; i < rules->rule_count; i++) { + fprintf(output_file, " case %d:\n", (int)i); + fprintf(output_file, " node->tag = %stag_%s;\n", prefix, rules->rules[i].Tag); + const char* mapped_id = get_mapped_id(rules, rules->rules[i].Tag); + if (mapped_id) { + fprintf(output_file, " node->id = %sid_%s;\n", prefix, mapped_id); + } else { + fprintf(output_file, " node->id = %sid_default;\n", prefix); + } + fprintf(output_file, " break;\n"); + } + fprintf(output_file, " }\n"); + fprintf(output_file, "}\n\n"); + + // Write slex_free function + fprintf(output_file, "char %sfree(%s* head) {\n", prefix, data_type); + fprintf(output_file, " while (head) {\n"); + fprintf(output_file, " %s* next = head->next;\n", data_type); + fprintf(output_file, " free(head->head);\n"); + fprintf(output_file, " if (head->file_name) free(head->file_name);\n"); + fprintf(output_file, " free(head);\n"); + fprintf(output_file, " head = next;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " return 1;\n"); + fprintf(output_file, "}\n\n"); + + // Write slex_cstr function (DFA matching loop) + fprintf(output_file, "char %scstr(char* input, char* file_name, %s** head) {\n", prefix, data_type); + fprintf(output_file, " if (!input || !head) return 0;\n"); + fprintf(output_file, " *head = NULL;\n"); + fprintf(output_file, " %s* tail = NULL;\n", data_type); + fprintf(output_file, " char* p = input;\n"); + fprintf(output_file, " int64_t current_line = 1;\n"); + fprintf(output_file, " int64_t current_col = 1;\n\n"); + fprintf(output_file, " while (*p != '\\0') {\n"); + fprintf(output_file, " int state = 0;\n"); + fprintf(output_file, " char* match_end = NULL;\n"); + fprintf(output_file, " int match_rule = -1;\n"); + fprintf(output_file, " int64_t token_line = current_line;\n"); + fprintf(output_file, " int64_t token_col = current_col;\n\n"); + fprintf(output_file, " char* curr_p = p;\n"); + fprintf(output_file, " while (*curr_p != '\\0') {\n"); + fprintf(output_file, " unsigned char c = (unsigned char)*curr_p;\n"); + fprintf(output_file, " int next_state = transitions[state][c];\n"); + fprintf(output_file, " if (next_state == -1) break;\n"); + fprintf(output_file, " state = next_state;\n"); + fprintf(output_file, " if (accepting_rules[state] != -1) {\n"); + fprintf(output_file, " match_end = curr_p + 1;\n"); + fprintf(output_file, " match_rule = accepting_rules[state];\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " curr_p++;\n"); + fprintf(output_file, " }\n\n"); + fprintf(output_file, " if (match_rule != -1 && match_end > p) {\n"); + fprintf(output_file, " int64_t len = match_end - p;\n"); + fprintf(output_file, " %s* node = (%s*)malloc(sizeof(%s));\n", data_type, data_type, data_type); + fprintf(output_file, " node->head = (char*)malloc(len + 1);\n"); + fprintf(output_file, " memcpy(node->head, p, len);\n"); + fprintf(output_file, " node->head[len] = '\\0';\n"); + fprintf(output_file, " node->length = len;\n"); + fprintf(output_file, " node->file_name = file_name ? strdup(file_name) : NULL;\n"); + fprintf(output_file, " node->line = token_line;\n"); + fprintf(output_file, " node->col = token_col;\n"); + fprintf(output_file, " node->prev = NULL;\n"); + fprintf(output_file, " node->next = NULL;\n"); + fprintf(output_file, " assign_tag_and_id(node, match_rule);\n\n"); + fprintf(output_file, " // Update line/col tracker\n"); + fprintf(output_file, " for (char* t = p; t < match_end; t++) {\n"); + fprintf(output_file, " if (*t == '\\n') {\n"); + fprintf(output_file, " current_line++;\n"); + fprintf(output_file, " current_col = 1;\n"); + fprintf(output_file, " } else {\n"); + fprintf(output_file, " current_col++;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + fprintf(output_file, " %s* output_node = NULL;\n", data_type); + fprintf(output_file, " %spost_process_result pr = %spost_process(node, &output_node);\n", prefix, prefix); + fprintf(output_file, " if (pr == %sskip) {\n", prefix); + fprintf(output_file, " free(node->head);\n"); + fprintf(output_file, " if (node->file_name) free(node->file_name);\n"); + fprintf(output_file, " free(node);\n"); + fprintf(output_file, " } else {\n"); + fprintf(output_file, " %s* to_append = (pr == %scontinue_with_output) ? output_node : node;\n", data_type, prefix); + fprintf(output_file, " if (to_append) {\n"); + fprintf(output_file, " if (!*head) {\n"); + fprintf(output_file, " *head = to_append;\n"); + fprintf(output_file, " tail = to_append;\n"); + fprintf(output_file, " } else {\n"); + fprintf(output_file, " tail->next = to_append;\n"); + fprintf(output_file, " to_append->prev = tail;\n"); + fprintf(output_file, " tail = to_append;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " p = match_end;\n"); + fprintf(output_file, " } else if (*p == ' ' || *p == '\\t' || *p == '\\r' || *p == '\\n') {\n"); + fprintf(output_file, " if (*p == '\\n') {\n"); + fprintf(output_file, " current_line++;\n"); + fprintf(output_file, " current_col = 1;\n"); + fprintf(output_file, " } else {\n"); + fprintf(output_file, " current_col++;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " p++;\n"); + fprintf(output_file, " } else {\n"); + fprintf(output_file, " fprintf(stderr, \"Lexical error at %%s:%%lld:%%lld near '%%c'\\n\", file_name ? file_name : \"\", (long long)token_line, (long long)token_col, *p);\n"); + fprintf(output_file, " %sfree(*head);\n", prefix); + fprintf(output_file, " *head = NULL;\n"); + fprintf(output_file, " return 0;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " return 1;\n"); + fprintf(output_file, "}\n\n"); + + // Write slex_file function + fprintf(output_file, "char %sfile(FILE* f, char* file_name, %s** head) {\n", prefix, data_type); + fprintf(output_file, " if (!f || !head) return 0;\n"); + fprintf(output_file, " fseek(f, 0, SEEK_END);\n"); + fprintf(output_file, " long size = ftell(f);\n"); + fprintf(output_file, " fseek(f, 0, SEEK_SET);\n\n"); + fprintf(output_file, " char* buf = (char*)malloc(size + 1);\n"); + fprintf(output_file, " if (!buf) return 0;\n"); + fprintf(output_file, " size_t read_bytes = fread(buf, 1, size, f);\n"); + fprintf(output_file, " buf[read_bytes] = '\\0';\n\n"); + fprintf(output_file, " char success = %scstr(buf, file_name, head);\n", prefix); + fprintf(output_file, " free(buf);\n"); + fprintf(output_file, " return success;\n"); + fprintf(output_file, "}\n"); + + slex_free_dfa(dfa, dfa_state_count); + return true; } \ No newline at end of file diff --git a/Source/Languages/CSharp/slex_template.c b/Source/Languages/CSharp/slex_template.c index a4f3669..d2a35a3 100644 --- a/Source/Languages/CSharp/slex_template.c +++ b/Source/Languages/CSharp/slex_template.c @@ -1,7 +1,259 @@ #include "../../../Headers/slex_core.h" +#include "../../../Headers/slex_regex.h" +#include +#include +#include + +// Helper to check if a tag is mapped to an ID, and return it. +static const char* get_mapped_id(slex_rules* rules, const char* tag) { + for (uint64_t i = 0; i < rules->mapping_count; i++) { + if (strcmp(rules->mappings[i].Tag, tag) == 0) { + return rules->mappings[i].Id; + } + } + return NULL; +} + +// Print a string as a safe C# verbatim string literal +static void print_verbatim_string(FILE* f, const char* str) { + fprintf(f, "@\""); + for (int i = 0; str[i] != '\0'; i++) { + if (str[i] == '"') { + fprintf(f, "\"\""); + } else { + fputc(str[i], f); + } + } + fprintf(f, "\""); +} bool slex_translate_to_file_csharp(slex_options *options, slex_rules *rules, FILE *output_file) { - // TODO: Stub for moment. - return false; + if (!options || !rules || !output_file) return false; + + char* ns_name = (options->namespace_name && strlen(options->namespace_name) > 0) ? options->namespace_name : "SLexGenerated"; + char* class_name = (options->class_name && strlen(options->class_name) > 0) ? options->class_name : "SLexer"; + char* data_type = (options->data_type_name && strlen(options->data_type_name) > 0) ? options->data_type_name : "Segment"; + char* prefix = options->prefix ? options->prefix : ""; + + // Extract C# code block variables & post_processor + char* variables = ""; + char* post_processor = ""; + for (uint64_t i = 0; i < rules->code_block_count; i++) { + if (rules->code_blocks[i].target_languge == csharp) { + if (rules->code_blocks[i].variables) { + variables = rules->code_blocks[i].variables; + } + if (rules->code_blocks[i].post_processor_code) { + post_processor = rules->code_blocks[i].post_processor_code; + } + break; + } + } + + // Generate file content + fprintf(output_file, "using System;\n"); + fprintf(output_file, "using System.IO;\n"); + fprintf(output_file, "using System.Text;\n"); + fprintf(output_file, "using System.Text.RegularExpressions;\n\n"); + + fprintf(output_file, "namespace %s\n{\n", ns_name); + + // SegmentTag enum + fprintf(output_file, " public enum %sTag\n {\n", data_type); + for (uint64_t i = 0; i < rules->rule_count; i++) { + fprintf(output_file, " %s,\n", rules->rules[i].Tag); + } + fprintf(output_file, " }\n\n"); + + // SegmentId enum + fprintf(output_file, " public enum %sId\n {\n", data_type); + fprintf(output_file, " Default = 0,\n"); + for (uint64_t i = 0; i < rules->mapping_count; i++) { + bool unique = true; + for (uint64_t j = 0; j < i; j++) { + if (strcmp(rules->mappings[j].Id, rules->mappings[i].Id) == 0) { + unique = false; + break; + } + } + if (unique) { + fprintf(output_file, " %s,\n", rules->mappings[i].Id); + } + } + fprintf(output_file, " }\n\n"); + + // PostProcessResult enum + fprintf(output_file, " public enum PostProcessResult\n {\n"); + fprintf(output_file, " Continue,\n"); + fprintf(output_file, " Skip,\n"); + fprintf(output_file, " ContinueWithOutput\n"); + fprintf(output_file, " }\n\n"); + + // Segment class + fprintf(output_file, " public class %s\n {\n", data_type); + fprintf(output_file, " public string Content { get; set; } = string.Empty;\n"); + fprintf(output_file, " public string FileName { get; set; } = string.Empty;\n"); + fprintf(output_file, " public %s? Prev { get; set; }\n", data_type); + fprintf(output_file, " public %s? Next { get; set; }\n", data_type); + fprintf(output_file, " public long Line { get; set; }\n"); + fprintf(output_file, " public long Column { get; set; }\n"); + fprintf(output_file, " public %sTag Tag { get; set; }\n", data_type); + fprintf(output_file, " public %sId Id { get; set; }\n", data_type); + fprintf(output_file, " }\n\n"); + + // SLexer class + fprintf(output_file, " public class %s\n {\n", class_name); + + // User variables + if (strlen(variables) > 0) { + fprintf(output_file, " // --- User Variables ---\n%s\n // ----------------------\n\n", variables); + } + + // slex_post_process method + fprintf(output_file, " private PostProcessResult slex_post_process(%s Input, out %s Output)\n {\n", data_type, data_type); + if (strlen(post_processor) > 0) { + fprintf(output_file, "%s\n", post_processor); + } else { + fprintf(output_file, " Output = Input;\n"); + fprintf(output_file, " return PostProcessResult.Continue;\n"); + } + fprintf(output_file, " }\n\n"); + + // Compiled System.Text.RegularExpressions.Regex rules using the \G anchor + fprintf(output_file, " private static readonly Regex[] Rules = new Regex[] {\n"); + for (uint64_t i = 0; i < rules->rule_count; i++) { + fprintf(output_file, " new Regex(@\"\\G\" + "); + print_verbatim_string(output_file, rules->rules[i].Pattern); + fprintf(output_file, ", RegexOptions.Compiled)"); + if (i < rules->rule_count - 1) fprintf(output_file, ",\n"); + else fprintf(output_file, "\n"); + } + fprintf(output_file, " };\n\n"); + + // Tag and ID assignment + fprintf(output_file, " private void AssignTagAndId(%s node, int ruleIdx)\n {\n", data_type); + fprintf(output_file, " switch (ruleIdx)\n {\n"); + for (uint64_t i = 0; i < rules->rule_count; i++) { + fprintf(output_file, " case %d:\n", (int)i); + fprintf(output_file, " node.Tag = %sTag.%s;\n", data_type, rules->rules[i].Tag); + const char* mapped_id = get_mapped_id(rules, rules->rules[i].Tag); + if (mapped_id) { + fprintf(output_file, " node.Id = %sId.%s;\n", data_type, mapped_id); + } else { + fprintf(output_file, " node.Id = %sId.Default;\n", data_type); + } + fprintf(output_file, " break;\n"); + } + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + + // SLex overloads + // 1. FileInfo + fprintf(output_file, " public bool %sSLex(FileInfo inputFile, out %s? Head)\n {\n", prefix, data_type); + fprintf(output_file, " if (inputFile == null)\n {\n"); + fprintf(output_file, " Head = null;\n"); + fprintf(output_file, " return false;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " using (var stream = inputFile.OpenRead())\n {\n"); + fprintf(output_file, " return %sSLex(stream, inputFile.FullName, out Head);\n", prefix); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + + // 2. Stream + fprintf(output_file, " public bool %sSLex(Stream inputStream, out %s? Head)\n {\n", prefix, data_type); + fprintf(output_file, " return %sSLex(inputStream, string.Empty, out Head);\n", prefix); + fprintf(output_file, " }\n\n"); + + // helper Stream with filename + fprintf(output_file, " public bool %sSLex(Stream inputStream, string fileName, out %s? Head)\n {\n", prefix, data_type); + fprintf(output_file, " if (inputStream == null)\n {\n"); + fprintf(output_file, " Head = null;\n"); + fprintf(output_file, " return false;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " using (var reader = new StreamReader(inputStream))\n {\n"); + fprintf(output_file, " return %sSLex(reader.ReadToEnd(), fileName, out Head);\n", prefix); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + + // 3. string + fprintf(output_file, " public bool %sSLex(string inputContent, out %s? Head)\n {\n", prefix, data_type); + fprintf(output_file, " return %sSLex(inputContent, string.Empty, out Head);\n", prefix); + fprintf(output_file, " }\n\n"); + + // Core matching loop: string with filename + fprintf(output_file, " public bool %sSLex(string inputContent, string fileName, out %s? Head)\n {\n", prefix, data_type); + fprintf(output_file, " Head = null;\n"); + fprintf(output_file, " if (inputContent == null) return false;\n\n"); + fprintf(output_file, " %s? head = null;\n", data_type); + fprintf(output_file, " %s? tail = null;\n", data_type); + fprintf(output_file, " int idx = 0;\n"); + fprintf(output_file, " int len = inputContent.Length;\n"); + fprintf(output_file, " long currentLine = 1;\n"); + fprintf(output_file, " long currentCol = 1;\n\n"); + fprintf(output_file, " while (idx < len)\n {\n"); + fprintf(output_file, " int bestRule = -1;\n"); + fprintf(output_file, " int bestLen = 0;\n"); + fprintf(output_file, " string bestVal = \"\";\n"); + fprintf(output_file, " long tokenLine = currentLine;\n"); + fprintf(output_file, " long tokenCol = currentCol;\n\n"); + fprintf(output_file, " for (int r = 0; r < Rules.Length; r++)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " var m = Rules[r].Match(inputContent, idx);\n"); + fprintf(output_file, " if (m.Success && m.Length > bestLen)\n"); + fprintf(output_file, " {\n"); + fprintf(output_file, " bestLen = m.Length;\n"); + fprintf(output_file, " bestRule = r;\n"); + fprintf(output_file, " bestVal = m.Value;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + fprintf(output_file, " if (bestRule != -1 && bestLen > 0)\n {\n"); + fprintf(output_file, " var node = new %s\n {\n", data_type); + fprintf(output_file, " Content = bestVal,\n"); + fprintf(output_file, " FileName = fileName,\n"); + fprintf(output_file, " Line = tokenLine,\n"); + fprintf(output_file, " Column = tokenCol\n"); + fprintf(output_file, " };\n"); + fprintf(output_file, " AssignTagAndId(node, bestRule);\n\n"); + fprintf(output_file, " // Update line/col tracker\n"); + fprintf(output_file, " for (int t = idx; t < idx + bestLen; t++)\n {\n"); + fprintf(output_file, " if (inputContent[t] == '\\n')\n {\n"); + fprintf(output_file, " currentLine++;\n"); + fprintf(output_file, " currentCol = 1;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " else\n {\n"); + fprintf(output_file, " currentCol++;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + fprintf(output_file, " %s? outputNode;\n", data_type); + fprintf(output_file, " PostProcessResult pr = slex_post_process(node, out outputNode);\n"); + fprintf(output_file, " if (pr != PostProcessResult.Skip)\n {\n"); + fprintf(output_file, " %s? toAppend = (pr == PostProcessResult.ContinueWithOutput) ? outputNode : node;\n", data_type); + fprintf(output_file, " if (toAppend != null)\n {\n"); + fprintf(output_file, " if (head == null)\n {\n"); + fprintf(output_file, " head = toAppend;\n"); + fprintf(output_file, " tail = toAppend;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " else\n {\n"); + fprintf(output_file, " tail.Next = toAppend;\n"); + fprintf(output_file, " toAppend.Prev = tail;\n"); + fprintf(output_file, " tail = toAppend;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " idx += bestLen;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " else\n {\n"); + fprintf(output_file, " Console.Error.WriteLine($\"Lexical error at {fileName}:{tokenLine}:{tokenCol} near '{inputContent[idx]}'\");\n"); + fprintf(output_file, " Head = null;\n"); + fprintf(output_file, " return false;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n\n"); + fprintf(output_file, " Head = head;\n"); + fprintf(output_file, " return true;\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, " }\n"); + fprintf(output_file, "}\n"); + + return true; } \ No newline at end of file diff --git a/Source/cli/main.c b/Source/cli/main.c index e29ad76..9d8bd19 100644 --- a/Source/cli/main.c +++ b/Source/cli/main.c @@ -1,6 +1,190 @@ #include "../../Headers/slex_core.h" +#include +#include +#include +#include + +static void print_usage(const char* prog_name) { + printf("Usage: %s [options] [options]\n\n", prog_name); + printf("Options:\n"); + printf(" -o Output file/output folder\n"); + printf(" -l Specify target language: c, c#, csharp (default: detected or c)\n"); + printf(" -h
Output header file (separates declarations and implementation for C)\n"); + printf(" -ns Specify namespace (C# only. Default: SLexGenerated)\n"); + printf(" -class Specify class name (C# only. Default: SLexer)\n"); + printf(" -prefix Specify prefix for functions/methods\n"); + printf(" -data_type Specify data type name of the segment\n"); +} int main(int ac, char **av) { + char* input_path = NULL; + char* output_path = NULL; + char* header_path = NULL; + char* lang_str = NULL; + char* ns_name = NULL; + char* class_name = NULL; + char* prefix = NULL; + char* data_type_name = NULL; + + for (int i = 1; i < ac; i++) { + if (av[i][0] == '-') { + if (strcmp(av[i], "-o") == 0) { + if (i + 1 < ac) output_path = av[++i]; + else { fprintf(stderr, "Error: -o option requires an argument\n"); return 1; } + } else if (strcmp(av[i], "-l") == 0) { + if (i + 1 < ac) lang_str = av[++i]; + else { fprintf(stderr, "Error: -l option requires an argument\n"); return 1; } + } else if (strcmp(av[i], "-h") == 0) { + if (i + 1 < ac) header_path = av[++i]; + else { fprintf(stderr, "Error: -h option requires an argument\n"); return 1; } + } else if (strcmp(av[i], "-ns") == 0) { + if (i + 1 < ac) ns_name = av[++i]; + else { fprintf(stderr, "Error: -ns option requires an argument\n"); return 1; } + } else if (strcmp(av[i], "-class") == 0) { + if (i + 1 < ac) class_name = av[++i]; + else { fprintf(stderr, "Error: -class option requires an argument\n"); return 1; } + } else if (strcmp(av[i], "-prefix") == 0) { + if (i + 1 < ac) prefix = av[++i]; + else { fprintf(stderr, "Error: -prefix option requires an argument\n"); return 1; } + } else if (strcmp(av[i], "-data_type") == 0) { + if (i + 1 < ac) data_type_name = av[++i]; + else { fprintf(stderr, "Error: -data_type option requires an argument\n"); return 1; } + } else { + fprintf(stderr, "Error: Unknown option %s\n", av[i]); + print_usage(av[0]); + return 1; + } + } else { + if (!input_path) { + input_path = av[i]; + } else { + fprintf(stderr, "Error: Multiple input files are not supported: %s\n", av[i]); + return 1; + } + } + } + + if (!input_path) { + fprintf(stderr, "Error: No input file specified\n"); + print_usage(av[0]); + return 1; + } + + // Determine target language + slex_target_language target_lang = c_language; + bool lang_detected = false; + + if (lang_str) { + char temp_lang[256]; + int l_len = (int)strlen(lang_str); + for (int k = 0; k < l_len && k < 255; k++) { + temp_lang[k] = (char)tolower((unsigned char)lang_str[k]); + } + temp_lang[l_len] = '\0'; + + if (strcmp(temp_lang, "c") == 0) { + target_lang = c_language; + lang_detected = true; + } else if (strcmp(temp_lang, "c#") == 0 || strcmp(temp_lang, "csharp") == 0) { + target_lang = csharp; + lang_detected = true; + } else { + fprintf(stderr, "Error: Unsupported language %s\n", lang_str); + return 1; + } + } else if (output_path) { + // Guess language from output path extension + char* ext = strrchr(output_path, '.'); + if (ext) { + if (strcmp(ext, ".cs") == 0) { + target_lang = csharp; + lang_detected = true; + } else if (strcmp(ext, ".c") == 0) { + target_lang = c_language; + lang_detected = true; + } + } + } + + if (!lang_detected) { + // Default to C if not specified and not detected + target_lang = c_language; + } + + // Initialize options with defaults + slex_options options; + memset(&options, 0, sizeof(options)); + options.target_language = target_lang; + options.header_output = header_path; + + if (target_lang == c_language) { + options.prefix = prefix ? prefix : "slex_"; + options.data_type_name = data_type_name ? data_type_name : "slex_segment"; + } else { + options.namespace_name = ns_name ? ns_name : "SLexGenerated"; + options.class_name = class_name ? class_name : "SLexer"; + options.prefix = prefix ? prefix : ""; + options.data_type_name = data_type_name ? data_type_name : "Segment"; + } + + // Load input rule file + FILE* in_f = fopen(input_path, "r"); + if (!in_f) { + fprintf(stderr, "Error: Failed to open input file %s\n", input_path); + return 1; + } + + slex_rules rules; + memset(&rules, 0, sizeof(rules)); + if (!slex_read_rule_from_file(in_f, &rules)) { + fprintf(stderr, "Error: Failed to parse rule file %s\n", input_path); + fclose(in_f); + return 1; + } + fclose(in_f); + + // Open output file + FILE* out_f = stdout; + if (output_path && strlen(output_path) > 0) { + out_f = fopen(output_path, "w"); + if (!out_f) { + fprintf(stderr, "Error: Failed to open output file %s\n", output_path); + return 1; + } + } + + // Perform translation + bool success = slex_translate_to_file(&options, &rules, out_f); + + if (out_f != stdout) { + fclose(out_f); + } + + // Free parsed rule structures + for (uint64_t i = 0; i < rules.rule_count; i++) { + free(rules.rules[i].Tag); + free(rules.rules[i].Pattern); + } + free(rules.rules); + + for (uint64_t i = 0; i < rules.mapping_count; i++) { + free(rules.mappings[i].Id); + free(rules.mappings[i].Tag); + } + free(rules.mappings); + + for (uint64_t i = 0; i < rules.code_block_count; i++) { + if (rules.code_blocks[i].post_processor_code) free(rules.code_blocks[i].post_processor_code); + if (rules.code_blocks[i].variables) free(rules.code_blocks[i].variables); + } + free(rules.code_blocks); + + if (!success) { + fprintf(stderr, "Error: Code generation failed\n"); + return 1; + } + + printf("Success: Generated lexer source code\n"); return 0; } \ No newline at end of file diff --git a/Source/slex.c b/Source/slex.c index 7972179..629efdc 100644 --- a/Source/slex.c +++ b/Source/slex.c @@ -1 +1,14 @@ -#include "../Headers/slex_core.h" \ No newline at end of file +#include "../Headers/slex_core.h" +#include + +bool slex_translate_to_file(slex_options *options, slex_rules *rules, FILE *output_file) +{ + if (!options || !rules || !output_file) return false; + + if (options->target_language == c_language) { + return slex_translate_to_file_c(options, rules, output_file); + } else if (options->target_language == csharp) { + return slex_translate_to_file_csharp(options, rules, output_file); + } + return false; +} \ No newline at end of file diff --git a/Source/slex_parser.c b/Source/slex_parser.c new file mode 100644 index 0000000..f1cc6ae --- /dev/null +++ b/Source/slex_parser.c @@ -0,0 +1,249 @@ +#include "../Headers/slex_core.h" +#include +#include +#include +#include + +// Helper: Trim leading and trailing whitespace +static char* trim_whitespace(char* str) { + while (isspace((unsigned char)*str)) { + str++; + } + if (*str == '\0') { + return str; + } + char* end = str + strlen(str) - 1; + while (end > str && isspace((unsigned char)*end)) { + end--; + } + *(end + 1) = '\0'; + return str; +} + +// Helper: Get next line from buffer +static bool get_next_line(char** cursor, char* line_buf, int max_len) { + char* c = *cursor; + if (*c == '\0') return false; + int idx = 0; + while (*c != '\0' && *c != '\n' && *c != '\r' && idx < max_len - 1) { + line_buf[idx++] = *c++; + } + line_buf[idx] = '\0'; + + // Skip newline characters + if (*c == '\r') c++; + if (*c == '\n') c++; + + *cursor = c; + return true; +} + +bool slex_read_rule_from_cstr(char *content, slex_rules *output_rule) { + if (!content || !output_rule) return false; + + // Initialize output structure + output_rule->rules = NULL; + output_rule->rule_count = 0; + output_rule->mappings = NULL; + output_rule->mapping_count = 0; + output_rule->code_blocks = NULL; + output_rule->code_block_count = 0; + + typedef enum { + STATE_NONE, + STATE_RULE, + STATE_MAPPING, + STATE_CODE + } ParserState; + + ParserState state = STATE_NONE; + slex_target_language current_lang = c_language; + bool has_lang = false; + + char* cursor = content; + char line[4096]; + + while (get_next_line(&cursor, line, sizeof(line))) { + char* trimmed = trim_whitespace(line); + + // Skip comments and empty lines + if (trimmed[0] == '\0' || trimmed[0] == '#' || (trimmed[0] == '/' && trimmed[1] == '/')) { + continue; + } + + // Section switches + if (strcmp(trimmed, "rule:") == 0) { + state = STATE_RULE; + continue; + } else if (strcmp(trimmed, "mapping:") == 0) { + state = STATE_MAPPING; + continue; + } else if (strcmp(trimmed, "code:") == 0) { + state = STATE_CODE; + continue; + } + + if (state == STATE_RULE) { + // Split into and + // Tag is first space-delimited token + char* tag = trimmed; + char* pattern = trimmed; + while (*pattern != '\0' && !isspace((unsigned char)*pattern)) { + pattern++; + } + if (*pattern != '\0') { + *pattern = '\0'; + pattern++; + pattern = trim_whitespace(pattern); + } + + if (strlen(tag) > 0 && strlen(pattern) > 0) { + output_rule->rule_count++; + output_rule->rules = (slex_rule*)realloc(output_rule->rules, output_rule->rule_count * sizeof(slex_rule)); + output_rule->rules[output_rule->rule_count - 1].Tag = strdup(tag); + output_rule->rules[output_rule->rule_count - 1].Pattern = strdup(pattern); + } + } else if (state == STATE_MAPPING) { + // Split into and + char* id = trimmed; + char* tag = trimmed; + while (*tag != '\0' && !isspace((unsigned char)*tag)) { + tag++; + } + if (*tag != '\0') { + *tag = '\0'; + tag++; + tag = trim_whitespace(tag); + } + + if (strlen(id) > 0 && strlen(tag) > 0) { + output_rule->mapping_count++; + output_rule->mappings = (slex_mapping*)realloc(output_rule->mappings, output_rule->mapping_count * sizeof(slex_mapping)); + output_rule->mappings[output_rule->mapping_count - 1].Id = strdup(id); + output_rule->mappings[output_rule->mapping_count - 1].Tag = strdup(tag); + } + } else if (state == STATE_CODE) { + int len = (int)strlen(trimmed); + if (trimmed[0] == '%' && trimmed[len - 1] == '%') { + // Language definition block like %c% or %c#% or %csharp% + char lang_name[256]; + strncpy(lang_name, trimmed + 1, len - 2); + lang_name[len - 2] = '\0'; + char* trimmed_lang = trim_whitespace(lang_name); + + if (strcmp(trimmed_lang, "c") == 0) { + current_lang = c_language; + has_lang = true; + } else if (strcmp(trimmed_lang, "c#") == 0 || strcmp(trimmed_lang, "csharp") == 0) { + current_lang = csharp; + has_lang = true; + } else { + has_lang = false; + } + } else if (has_lang && strcmp(trimmed, "%post_processor") == 0) { + // Read all lines until "post_processor%" + int cap = 4096; + char* code = (char*)malloc(cap); + code[0] = '\0'; + int code_len = 0; + + char code_line[4096]; + while (get_next_line(&cursor, code_line, sizeof(code_line))) { + char* trimmed_code = trim_whitespace(code_line); + if (strcmp(trimmed_code, "post_processor%") == 0) { + break; + } + int line_len = (int)strlen(code_line); + if (code_len + line_len + 2 >= cap) { + cap *= 2; + code = (char*)realloc(code, cap); + } + strcat(code, code_line); + strcat(code, "\n"); + code_len += line_len + 1; + } + + // Add or update code block + int block_idx = -1; + for (uint64_t i = 0; i < output_rule->code_block_count; i++) { + if (output_rule->code_blocks[i].target_languge == current_lang) { + block_idx = (int)i; + break; + } + } + + if (block_idx == -1) { + output_rule->code_block_count++; + output_rule->code_blocks = (code_block*)realloc(output_rule->code_blocks, output_rule->code_block_count * sizeof(code_block)); + block_idx = (int)output_rule->code_block_count - 1; + output_rule->code_blocks[block_idx].target_languge = current_lang; + output_rule->code_blocks[block_idx].post_processor_code = NULL; + output_rule->code_blocks[block_idx].variables = NULL; + } + output_rule->code_blocks[block_idx].post_processor_code = code; + } else if (has_lang && strcmp(trimmed, "%variables") == 0) { + // Read all lines until "variables%" + int cap = 4096; + char* vars = (char*)malloc(cap); + vars[0] = '\0'; + int vars_len = 0; + + char vars_line[4096]; + while (get_next_line(&cursor, vars_line, sizeof(vars_line))) { + char* trimmed_vars = trim_whitespace(vars_line); + if (strcmp(trimmed_vars, "variables%") == 0) { + break; + } + int line_len = (int)strlen(vars_line); + if (vars_len + line_len + 2 >= cap) { + cap *= 2; + vars = (char*)realloc(vars, cap); + } + strcat(vars, vars_line); + strcat(vars, "\n"); + vars_len += line_len + 1; + } + + // Add or update code block + int block_idx = -1; + for (uint64_t i = 0; i < output_rule->code_block_count; i++) { + if (output_rule->code_blocks[i].target_languge == current_lang) { + block_idx = (int)i; + break; + } + } + + if (block_idx == -1) { + output_rule->code_block_count++; + output_rule->code_blocks = (code_block*)realloc(output_rule->code_blocks, output_rule->code_block_count * sizeof(code_block)); + block_idx = (int)output_rule->code_block_count - 1; + output_rule->code_blocks[block_idx].target_languge = current_lang; + output_rule->code_blocks[block_idx].post_processor_code = NULL; + output_rule->code_blocks[block_idx].variables = NULL; + } + output_rule->code_blocks[block_idx].variables = vars; + } + } + } + + return true; +} + +bool slex_read_rule_from_file(FILE *f, slex_rules *output_rule) { + if (!f || !output_rule) return false; + + // Determine file size + fseek(f, 0, SEEK_END); + long size = ftell(f); + fseek(f, 0, SEEK_SET); + + char* content = (char*)malloc(size + 1); + if (!content) return false; + + size_t read_bytes = fread(content, 1, size, f); + content[read_bytes] = '\0'; + + bool success = slex_read_rule_from_cstr(content, output_rule); + free(content); + return success; +} diff --git a/Source/slex_regex.c b/Source/slex_regex.c new file mode 100644 index 0000000..9aa3819 --- /dev/null +++ b/Source/slex_regex.c @@ -0,0 +1,702 @@ +#include "../Headers/slex_regex.h" +#include +#include +#include + +// Token representation for Regex Parsing +typedef enum { + TOKEN_CHAR, + TOKEN_CHAR_SET, + TOKEN_CONCAT, + TOKEN_ALT, + TOKEN_STAR, + TOKEN_PLUS, + TOKEN_QUESTION, + TOKEN_LPAREN, + TOKEN_RPAREN, +} RegexTokenType; + +typedef struct { + RegexTokenType type; + bool char_set[256]; +} RegexToken; + +// Global array to track all allocated NFA states for easy deallocation +static NFAState** g_nfa_states = NULL; +static int g_nfa_state_count = 0; +static int g_nfa_state_capacity = 0; + +static NFAState* create_nfa_state() { + NFAState* s = (NFAState*)malloc(sizeof(NFAState)); + s->id = g_nfa_state_count; + s->is_epsilon = false; + memset(s->char_set, 0, sizeof(s->char_set)); + s->edge1 = NULL; + s->edge2 = NULL; + s->accept_rule_index = -1; + + // Track state globally + if (g_nfa_state_count >= g_nfa_state_capacity) { + g_nfa_state_capacity = g_nfa_state_capacity == 0 ? 1024 : g_nfa_state_capacity * 2; + g_nfa_states = (NFAState**)realloc(g_nfa_states, g_nfa_state_capacity * sizeof(NFAState*)); + } + g_nfa_states[g_nfa_state_count++] = s; + return s; +} + +static void free_all_nfa_states() { + for (int i = 0; i < g_nfa_state_count; i++) { + free(g_nfa_states[i]); + } + free(g_nfa_states); + g_nfa_states = NULL; + g_nfa_state_count = 0; + g_nfa_state_capacity = 0; +} + +// Tokenize a regex pattern +static RegexToken* tokenize_regex(const char* pattern, int* token_count_out) { + int capacity = 128; + int count = 0; + RegexToken* tokens = (RegexToken*)malloc(capacity * sizeof(RegexToken)); + int len = (int)strlen(pattern); + int idx = 0; + + while (idx < len) { + if (count >= capacity) { + capacity *= 2; + tokens = (RegexToken*)realloc(tokens, capacity * sizeof(RegexToken)); + } + + char c = pattern[idx]; + + if (c == '\\') { + idx++; + if (idx >= len) { + // Trailing backslash, treat as literal backslash + tokens[count].type = TOKEN_CHAR; + memset(tokens[count].char_set, 0, 256); + tokens[count].char_set[(unsigned char)'\\'] = true; + count++; + break; + } + char esc = pattern[idx++]; + tokens[count].type = TOKEN_CHAR_SET; + memset(tokens[count].char_set, 0, 256); + + if (esc == 'p' && idx < len && pattern[idx] == '{') { + idx++; // skip '{' + char prop[256]; + int p_idx = 0; + while (idx < len && pattern[idx] != '}') { + prop[p_idx++] = pattern[idx++]; + } + prop[p_idx] = '\0'; + if (idx < len && pattern[idx] == '}') { + idx++; // skip '}' + } + + if (strcmp(prop, "P") == 0) { + const char* punct = "!\"#%&'()*,-./:;?@[\\]_{}"; + for (int k = 0; punct[k] != '\0'; k++) { + tokens[count].char_set[(unsigned char)punct[k]] = true; + } + } else if (strcmp(prop, "S") == 0) { + const char* sym = "$+<=>^`|~"; + for (int k = 0; sym[k] != '\0'; k++) { + tokens[count].char_set[(unsigned char)sym[k]] = true; + } + } else if (strcmp(prop, "L") == 0) { + for (int d = 'a'; d <= 'z'; d++) tokens[count].char_set[d] = true; + for (int d = 'A'; d <= 'Z'; d++) tokens[count].char_set[d] = true; + } else if (strcmp(prop, "N") == 0) { + for (int d = '0'; d <= '9'; d++) tokens[count].char_set[d] = true; + } + } else if (esc == 'n') { + tokens[count].char_set[10] = true; // LF + } else if (esc == 't') { + tokens[count].char_set[9] = true; // TAB + } else if (esc == 'r') { + tokens[count].char_set[13] = true; // CR + } else if (esc == 's') { + tokens[count].char_set[32] = true; // Space + tokens[count].char_set[9] = true; // TAB + tokens[count].char_set[13] = true; // CR + tokens[count].char_set[10] = true; // LF + } else if (esc == 'd') { + for (int d = '0'; d <= '9'; d++) tokens[count].char_set[d] = true; + } else if (esc == 'w') { + for (int d = '0'; d <= '9'; d++) tokens[count].char_set[d] = true; + for (int d = 'a'; d <= 'z'; d++) tokens[count].char_set[d] = true; + for (int d = 'A'; d <= 'Z'; d++) tokens[count].char_set[d] = true; + tokens[count].char_set[(unsigned char)'_'] = true; + } else { + // Literal escaped character + tokens[count].type = TOKEN_CHAR; + tokens[count].char_set[(unsigned char)esc] = true; + } + count++; + } else if (c == '[') { + idx++; + bool negate = false; + if (idx < len && pattern[idx] == '^') { + negate = true; + idx++; + } + + tokens[count].type = TOKEN_CHAR_SET; + memset(tokens[count].char_set, 0, 256); + + while (idx < len && pattern[idx] != ']') { + char c1 = pattern[idx++]; + if (c1 == '\\' && idx < len) { + char esc = pattern[idx++]; + if (esc == 'p' && idx < len && pattern[idx] == '{') { + idx++; // skip '{' + char prop[256]; + int p_idx = 0; + while (idx < len && pattern[idx] != '}') { + prop[p_idx++] = pattern[idx++]; + } + prop[p_idx] = '\0'; + if (idx < len && pattern[idx] == '}') { + idx++; // skip '}' + } + + if (strcmp(prop, "P") == 0) { + const char* punct = "!\"#%&'()*,-./:;?@[\\]_{}"; + for (int k = 0; punct[k] != '\0'; k++) { + tokens[count].char_set[(unsigned char)punct[k]] = true; + } + } else if (strcmp(prop, "S") == 0) { + const char* sym = "$+<=>^`|~"; + for (int k = 0; sym[k] != '\0'; k++) { + tokens[count].char_set[(unsigned char)sym[k]] = true; + } + } else if (strcmp(prop, "L") == 0) { + for (int d = 'a'; d <= 'z'; d++) tokens[count].char_set[d] = true; + for (int d = 'A'; d <= 'Z'; d++) tokens[count].char_set[d] = true; + } else if (strcmp(prop, "N") == 0) { + for (int d = '0'; d <= '9'; d++) tokens[count].char_set[d] = true; + } + continue; + } else if (esc == 'n') c1 = '\n'; + else if (esc == 't') c1 = '\t'; + else if (esc == 'r') c1 = '\r'; + else if (esc == 's') { + tokens[count].char_set[32] = true; + tokens[count].char_set[9] = true; + tokens[count].char_set[13] = true; + tokens[count].char_set[10] = true; + continue; + } else if (esc == 'd') { + for (int d = '0'; d <= '9'; d++) tokens[count].char_set[d] = true; + continue; + } else if (esc == 'w') { + for (int d = '0'; d <= '9'; d++) tokens[count].char_set[d] = true; + for (int d = 'a'; d <= 'z'; d++) tokens[count].char_set[d] = true; + for (int d = 'A'; d <= 'Z'; d++) tokens[count].char_set[d] = true; + tokens[count].char_set[(unsigned char)'_'] = true; + continue; + } else { + c1 = esc; + } + } + + // Check range: c1-c2 + if (idx + 1 < len && pattern[idx] == '-' && pattern[idx + 1] != ']') { + idx++; // skip '-' + char c2 = pattern[idx++]; + if (c2 == '\\' && idx < len) { + char esc = pattern[idx++]; + if (esc == 'n') c2 = '\n'; + else if (esc == 't') c2 = '\t'; + else if (esc == 'r') c2 = '\r'; + else c2 = esc; + } + for (int r = (unsigned char)c1; r <= (unsigned char)c2; r++) { + tokens[count].char_set[r] = true; + } + } else { + tokens[count].char_set[(unsigned char)c1] = true; + } + } + + if (idx < len && pattern[idx] == ']') { + idx++; + } + + if (negate) { + for (int i = 0; i < 256; i++) { + tokens[count].char_set[i] = !tokens[count].char_set[i]; + } + } + count++; + } else if (c == '.') { + tokens[count].type = TOKEN_CHAR_SET; + memset(tokens[count].char_set, 0, 256); + for (int i = 0; i < 256; i++) { + if (i != 10) { // any character except newline + tokens[count].char_set[i] = true; + } + } + count++; + idx++; + } else if (c == '*') { + tokens[count].type = TOKEN_STAR; + count++; + idx++; + } else if (c == '+') { + tokens[count].type = TOKEN_PLUS; + count++; + idx++; + } else if (c == '?') { + tokens[count].type = TOKEN_QUESTION; + count++; + idx++; + } else if (c == '|') { + tokens[count].type = TOKEN_ALT; + count++; + idx++; + } else if (c == '(') { + tokens[count].type = TOKEN_LPAREN; + count++; + idx++; + } else if (c == ')') { + tokens[count].type = TOKEN_RPAREN; + count++; + idx++; + } else { + tokens[count].type = TOKEN_CHAR; + memset(tokens[count].char_set, 0, 256); + tokens[count].char_set[(unsigned char)c] = true; + count++; + idx++; + } + } + + *token_count_out = count; + return tokens; +} + +// Insert explicit concatenation operators +static RegexToken* insert_concat(RegexToken* input, int input_count, int* output_count_out) { + int capacity = input_count * 2; + int count = 0; + RegexToken* output = (RegexToken*)malloc(capacity * sizeof(RegexToken)); + + for (int i = 0; i < input_count; i++) { + if (count >= capacity) { + capacity *= 2; + output = (RegexToken*)realloc(output, capacity * sizeof(RegexToken)); + } + + output[count++] = input[i]; + + if (i + 1 < input_count) { + RegexTokenType t1 = input[i].type; + RegexTokenType t2 = input[i + 1].type; + + bool t1_can_concat = (t1 == TOKEN_CHAR || t1 == TOKEN_CHAR_SET || t1 == TOKEN_STAR || t1 == TOKEN_PLUS || t1 == TOKEN_QUESTION || t1 == TOKEN_RPAREN); + bool t2_can_concat = (t2 == TOKEN_CHAR || t2 == TOKEN_CHAR_SET || t2 == TOKEN_LPAREN); + + if (t1_can_concat && t2_can_concat) { + if (count >= capacity) { + capacity *= 2; + output = (RegexToken*)realloc(output, capacity * sizeof(RegexToken)); + } + output[count].type = TOKEN_CONCAT; + memset(output[count].char_set, 0, 256); + count++; + } + } + } + + *output_count_out = count; + return output; +} + +// Shunting-yard algorithm to convert infix tokens to postfix tokens +static RegexToken* infix_to_postfix(RegexToken* infix, int infix_count, int* postfix_count_out) { + int capacity = infix_count; + int postfix_count = 0; + RegexToken* postfix = (RegexToken*)malloc(capacity * sizeof(RegexToken)); + + RegexToken stack[512]; + int stack_top = 0; + + for (int i = 0; i < infix_count; i++) { + RegexToken t = infix[i]; + + if (t.type == TOKEN_CHAR || t.type == TOKEN_CHAR_SET) { + if (postfix_count >= capacity) { + capacity *= 2; + postfix = (RegexToken*)realloc(postfix, capacity * sizeof(RegexToken)); + } + postfix[postfix_count++] = t; + } else if (t.type == TOKEN_LPAREN) { + stack[stack_top++] = t; + } else if (t.type == TOKEN_RPAREN) { + while (stack_top > 0 && stack[stack_top - 1].type != TOKEN_LPAREN) { + if (postfix_count >= capacity) { + capacity *= 2; + postfix = (RegexToken*)realloc(postfix, capacity * sizeof(RegexToken)); + } + postfix[postfix_count++] = stack[--stack_top]; + } + if (stack_top > 0) { + stack_top--; // pop LPAREN + } + } else if (t.type == TOKEN_STAR || t.type == TOKEN_PLUS || t.type == TOKEN_QUESTION) { + // Unary operators have highest precedence and are postfix, output immediately + if (postfix_count >= capacity) { + capacity *= 2; + postfix = (RegexToken*)realloc(postfix, capacity * sizeof(RegexToken)); + } + postfix[postfix_count++] = t; + } else { + // Binary operators (CONCAT, ALT) + int p_curr = (t.type == TOKEN_ALT) ? 1 : 2; + while (stack_top > 0) { + RegexTokenType top_type = stack[stack_top - 1].type; + if (top_type == TOKEN_CONCAT || top_type == TOKEN_ALT) { + int p_top = (top_type == TOKEN_ALT) ? 1 : 2; + if (p_top >= p_curr) { + if (postfix_count >= capacity) { + capacity *= 2; + postfix = (RegexToken*)realloc(postfix, capacity * sizeof(RegexToken)); + } + postfix[postfix_count++] = stack[--stack_top]; + } else { + break; + } + } else { + break; + } + } + stack[stack_top++] = t; + } + } + + while (stack_top > 0) { + if (postfix_count >= capacity) { + capacity *= 2; + postfix = (RegexToken*)realloc(postfix, capacity * sizeof(RegexToken)); + } + postfix[postfix_count++] = stack[--stack_top]; + } + + *postfix_count_out = postfix_count; + return postfix; +} + +// Build NFA from postfix tokens using Thompson's construction +static NFAFragment build_nfa(RegexToken* postfix, int postfix_count) { + NFAFragment stack[512]; + int stack_top = 0; + + for (int i = 0; i < postfix_count; i++) { + RegexToken t = postfix[i]; + + if (t.type == TOKEN_CHAR || t.type == TOKEN_CHAR_SET) { + NFAState* start = create_nfa_state(); + NFAState* accept = create_nfa_state(); + start->is_epsilon = false; + memcpy(start->char_set, t.char_set, 256); + start->edge1 = accept; + + NFAFragment frag = {start, accept}; + stack[stack_top++] = frag; + } else if (t.type == TOKEN_CONCAT) { + NFAFragment f2 = stack[--stack_top]; + NFAFragment f1 = stack[--stack_top]; + + f1.accept->is_epsilon = true; + f1.accept->edge1 = f2.start; + + NFAFragment frag = {f1.start, f2.accept}; + stack[stack_top++] = frag; + } else if (t.type == TOKEN_ALT) { + NFAFragment f2 = stack[--stack_top]; + NFAFragment f1 = stack[--stack_top]; + + NFAState* start = create_nfa_state(); + NFAState* accept = create_nfa_state(); + + start->is_epsilon = true; + start->edge1 = f1.start; + start->edge2 = f2.start; + + f1.accept->is_epsilon = true; + f1.accept->edge1 = accept; + + f2.accept->is_epsilon = true; + f2.accept->edge1 = accept; + + NFAFragment frag = {start, accept}; + stack[stack_top++] = frag; + } else if (t.type == TOKEN_STAR) { + NFAFragment f1 = stack[--stack_top]; + + NFAState* start = create_nfa_state(); + NFAState* accept = create_nfa_state(); + + start->is_epsilon = true; + start->edge1 = f1.start; + start->edge2 = accept; + + f1.accept->is_epsilon = true; + f1.accept->edge1 = f1.start; + f1.accept->edge2 = accept; + + NFAFragment frag = {start, accept}; + stack[stack_top++] = frag; + } else if (t.type == TOKEN_PLUS) { + NFAFragment f1 = stack[--stack_top]; + + NFAState* start = create_nfa_state(); + NFAState* accept = create_nfa_state(); + + start->is_epsilon = true; + start->edge1 = f1.start; + + f1.accept->is_epsilon = true; + f1.accept->edge1 = f1.start; + f1.accept->edge2 = accept; + + NFAFragment frag = {start, accept}; + stack[stack_top++] = frag; + } else if (t.type == TOKEN_QUESTION) { + NFAFragment f1 = stack[--stack_top]; + + NFAState* start = create_nfa_state(); + NFAState* accept = create_nfa_state(); + + start->is_epsilon = true; + start->edge1 = f1.start; + start->edge2 = accept; + + f1.accept->is_epsilon = true; + f1.accept->edge1 = accept; + + NFAFragment frag = {start, accept}; + stack[stack_top++] = frag; + } + } + + return stack[0]; +} + +// Computes epsilon closure of a set of NFA states +static void get_epsilon_closure(int* input_states, int input_count, NFAState** all_nfa_states, int total_nfa_states, int** output_states, int* output_count) { + bool* visited = (bool*)calloc(total_nfa_states, sizeof(bool)); + int* queue = (int*)malloc(total_nfa_states * sizeof(int)); + int head = 0, tail = 0; + + for (int i = 0; i < input_count; i++) { + int id = input_states[i]; + visited[id] = true; + queue[tail++] = id; + } + + while (head < tail) { + int curr_id = queue[head++]; + NFAState* s = all_nfa_states[curr_id]; + if (s->is_epsilon) { + if (s->edge1 && !visited[s->edge1->id]) { + visited[s->edge1->id] = true; + queue[tail++] = s->edge1->id; + } + if (s->edge2 && !visited[s->edge2->id]) { + visited[s->edge2->id] = true; + queue[tail++] = s->edge2->id; + } + } + } + + int count = 0; + for (int i = 0; i < total_nfa_states; i++) { + if (visited[i]) count++; + } + + int* res = (int*)malloc(count * sizeof(int)); + int idx = 0; + for (int i = 0; i < total_nfa_states; i++) { + if (visited[i]) { + res[idx++] = i; + } + } + + *output_states = res; + *output_count = count; + free(visited); + free(queue); +} + +// Compare two NFA state sets +static bool are_nfa_sets_equal(int* a, int a_count, int* b, int b_count) { + if (a_count != b_count) return false; + for (int i = 0; i < a_count; i++) { + if (a[i] != b[i]) return false; + } + return true; +} + +// Compiles a set of regular expression patterns into a complete DFA using subset construction +DFAState* slex_compile_regexes(char** patterns, int pattern_count, int* dfa_state_count_out) { + free_all_nfa_states(); // Reset global state tracker + + // 1. Build NFA for each pattern + NFAFragment* fragments = (NFAFragment*)malloc(pattern_count * sizeof(NFAFragment)); + for (int i = 0; i < pattern_count; i++) { + int t_count = 0, concat_count = 0, post_count = 0; + RegexToken* tokens = tokenize_regex(patterns[i], &t_count); + RegexToken* tokens_concat = insert_concat(tokens, t_count, &concat_count); + RegexToken* tokens_postfix = infix_to_postfix(tokens_concat, concat_count, &post_count); + + fragments[i] = build_nfa(tokens_postfix, post_count); + fragments[i].accept->accept_rule_index = i; + + free(tokens); + free(tokens_concat); + free(tokens_postfix); + } + + // 2. Create global start state with epsilon transitions to each pattern NFA's start state + NFAState* global_start = create_nfa_state(); + global_start->is_epsilon = true; + + NFAState* current_hub = global_start; + for (int i = 0; i < pattern_count; i++) { + if (i == pattern_count - 1) { + current_hub->edge1 = fragments[i].start; + } else { + NFAState* next_hub = create_nfa_state(); + next_hub->is_epsilon = true; + current_hub->edge1 = fragments[i].start; + current_hub->edge2 = next_hub; + current_hub = next_hub; + } + } + free(fragments); + + // 3. Subset construction + int total_nfa_states = g_nfa_state_count; + NFAState** all_nfa_states = g_nfa_states; + + int dfa_capacity = 1024; + int dfa_count = 0; + DFAState* dfa_states = (DFAState*)malloc(dfa_capacity * sizeof(DFAState)); + + // Queue for subset construction + int* work_queue = (int*)malloc(dfa_capacity * sizeof(int)); + int queue_head = 0, queue_tail = 0; + + // Start state epsilon closure + int start_nfa_id = global_start->id; + int* start_closure = NULL; + int start_closure_count = 0; + get_epsilon_closure(&start_nfa_id, 1, all_nfa_states, total_nfa_states, &start_closure, &start_closure_count); + + // Create start DFA state (0) + dfa_states[dfa_count].id = dfa_count; + dfa_states[dfa_count].nfa_states = start_closure; + dfa_states[dfa_count].nfa_state_count = start_closure_count; + memset(dfa_states[dfa_count].transitions, -1, sizeof(dfa_states[dfa_count].transitions)); + dfa_states[dfa_count].accept_rule_index = -1; + + work_queue[queue_tail++] = dfa_count; + dfa_count++; + + // Process queue + while (queue_head < queue_tail) { + int curr_dfa_id = work_queue[queue_head++]; + + // For each possible ASCII character transition + for (int c = 0; c < 256; c++) { + // Find NFA states reachable on character 'c' + int* reachable = (int*)malloc(total_nfa_states * sizeof(int)); + int reachable_count = 0; + + DFAState* curr_dfa = &dfa_states[curr_dfa_id]; + for (int i = 0; i < curr_dfa->nfa_state_count; i++) { + NFAState* nfa_s = all_nfa_states[curr_dfa->nfa_states[i]]; + if (!nfa_s->is_epsilon && nfa_s->char_set[c]) { + if (nfa_s->edge1) { + reachable[reachable_count++] = nfa_s->edge1->id; + } + } + } + + if (reachable_count > 0) { + // Compute epsilon closure of reachable NFA states + int* closure = NULL; + int closure_count = 0; + get_epsilon_closure(reachable, reachable_count, all_nfa_states, total_nfa_states, &closure, &closure_count); + free(reachable); + + // Check if this DFA state already exists + int existing_id = -1; + for (int d = 0; d < dfa_count; d++) { + if (are_nfa_sets_equal(dfa_states[d].nfa_states, dfa_states[d].nfa_state_count, closure, closure_count)) { + existing_id = d; + break; + } + } + + if (existing_id != -1) { + dfa_states[curr_dfa_id].transitions[c] = existing_id; + free(closure); + } else { + if (dfa_count >= dfa_capacity) { + dfa_capacity *= 2; + dfa_states = (DFAState*)realloc(dfa_states, dfa_capacity * sizeof(DFAState)); + work_queue = (int*)realloc(work_queue, dfa_capacity * sizeof(int)); + } + + dfa_states[dfa_count].id = dfa_count; + dfa_states[dfa_count].nfa_states = closure; + dfa_states[dfa_count].nfa_state_count = closure_count; + memset(dfa_states[dfa_count].transitions, -1, sizeof(dfa_states[dfa_count].transitions)); + dfa_states[dfa_count].accept_rule_index = -1; + + dfa_states[curr_dfa_id].transitions[c] = dfa_count; + work_queue[queue_tail++] = dfa_count; + dfa_count++; + } + } else { + free(reachable); + dfa_states[curr_dfa_id].transitions[c] = -1; + } + } + } + + // Determine accepting status of each DFA state based on NFA accept states + for (int d = 0; d < dfa_count; d++) { + int best_rule = -1; + for (int i = 0; i < dfa_states[d].nfa_state_count; i++) { + NFAState* nfa_s = all_nfa_states[dfa_states[d].nfa_states[i]]; + if (nfa_s->accept_rule_index != -1) { + if (best_rule == -1 || nfa_s->accept_rule_index < best_rule) { + best_rule = nfa_s->accept_rule_index; + } + } + } + dfa_states[d].accept_rule_index = best_rule; + } + + free(work_queue); + free_all_nfa_states(); // We no longer need the NFA states + + *dfa_state_count_out = dfa_count; + return dfa_states; +} + +void slex_free_dfa(DFAState* dfa_states, int dfa_state_count) { + if (dfa_states) { + for (int i = 0; i < dfa_state_count; i++) { + free(dfa_states[i].nfa_states); + } + free(dfa_states); + } +} diff --git a/bin/slex.exe b/bin/slex.exe deleted file mode 100644 index 9d989275bee6927bda87e6d7775928e161aa2871..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 50970 zcmeHw34C1Db@v@Dwv3k%u??nyFn~;Cf@OJ^0h1YNMt%b$X{?dFu{@1Oqa|n=X0c@; zL`F7YIuTK5(vp_+Q-kxBwEPk`?Uy)&rm_iOLei*YsT;@_g>0jk_ER?M5E6a=bC>t# z&C_OmB)>1M-%n5X-gD1A=iGD7z4yF3Z>ifi#0nW>MK~uX89RZL4mbZl^S^#nFT3)o zW$Ygoef6pnj=)#1YU=1rxMK0%_ISA46$$tB^d?>Vqpo6@-~EA-My{Rils|0 zDwCvxKE_%fEET0Y89N3+CfK!XaX~=^`y+tgfwKVetUxNz+z!H~Jm4fZ9mQ0jqY||X z7<;nCfqvm1b8)b2i@f9|6ROO~GS<)7^~D60N070AB+VhijCHBHDN>i4vGOV49ftvj zG}9dw$!K2^mF~;%j|8H=qaV44i?No9)^IWm;0RSz;w0HzICBm+sw?6Gan(hr0DvvQ zc{R?Q!;PAXn84$GL|?f=D{_#K<#>Gp?+NNd3r^y@0cXzPCVKl52?F&KUW{_~qo3sK z6!f^R0pNYzWW{w8y<``sM=D`MIEn8KR4@-W?{BoLHzI8_j5@-*31`mXW~_Rhh%X@(Azp`WMCztzxWAl|8#XyFAUFrsmzdLfXJmWh>UnJX^Gc+xW4>+WXk>~iOoOxyOH^T?-B|M@JImC1iyKQf^?|88UI$G-Ki z2QsUFvym~+z)fWi=aCPh-j}(&%y;x<=aK(~7S9*=>WtC6;XC@e^T;EdN%7m!Ld!7i z=ty#jZ~e$qG#=K!Fgcoi+ohP9GsU-}O=f8`{s+=PTv>x$_Oqa!Sz3VvxsEnFtzjz6RU;l#VL=j2(l9oQ14S|L>xb)a>CnvKDkxxH& zbi{e|J~YctEaX*(mO@c03uDyJl}}?%1$q?dTDl-x2ZGrX7gJU7-Kct=>j#BoZsA~V z;VBIGw!>9*`%*PpBzTkN7(HA?t88kM^K-#fZHM1j4CQ|Z;!?IC^^SoPWcg(WR(dlJ zog=Yd_Z@w~?fl4dynkRd4ua{;FW{l`E633ka^-jP7 zgT=cci#E7&4^NivMUr7KSOCP{1Tk->L>r_cV6JQ?cv-Pd`hZ)p+#Z@4qLI*}ec6+!4;YoLBZkT;|U-{pUG<`W%_EpZdA&m&seTf#)3l zOyEDyWseeQu+O2Tf0f3M&74ir<*f4&5*v%iG=rk`ZLh2r>EU|YkV{Da;4S?AsT7I5lgKQ&m=-j;oxRL-KIZeEqW9ERsHDHyu$cxH&#%gR97=i+sMt7Eo$DR1%HNzZ&%xvp&}Xa1WU zMQ?wM;7c0}Vt#X%Xz7(_s0f>WUv)YMpU;ZW(8`SnnzT$o`sBd5$>g@9FC<;*(*xrU zZSe9_+M3haVDi*CnrbJPYXfIXv~536{SvC^ffk79ZFi1;9ae!p*N@;|4t?*S+mSvQ zNRQkyaMLN4nmGFjYkj8pJy`YO!VAPa_I&X!l%X^>9tQED^&_TkNJj&26ebsA8eI5{ zn7_tvMPQi2PxYs`W-X1%g5-kdc?dG_MnQ7@!gC%BQab=Sa~5nCd3SOu2^FF!nRAv()4InX~lJD*U^k-k^oum3?ct&a zFs-!&(t)xLE!|YMhi19*thbbmu=1;DPgB)DQZbV(D;J%;mWt_b`_mItIfha;fqz47 zLtEEx=r47~bJ zskaRje{DCsAI1q{iKqE#;9P;034lVTsjSL#!i|P}arQdBBU<4Vp8&d+zO}@k{x>Zh zDxF;UVX(kZ|Lo+-eZ*{V>CekCJ)X)Y`S5gO8XdoiOoefWrkUMXX_h_;lt9M2YE`x# zSkNH>S=FV)s6r_&6o?Nb_UxU~pFu6mG=htzSK;t@` z=8QIQZsGkO#{b364|aocX8F;VoJad8S5ribEeD25Fxs6*zl&7`BXc+(K6-y+=4ccNw24-5DqF?b2{KSt)+eEwC$`P^V|pC zMRnV0!@>E%E0GVRe;OE!p38RsW^&SV0suf>1u(m0ncxq`iAZL#H{5xH^OhXnpsbfKmFTlqocp#$890-vP?z{E{Q3aFxMtLN7d$tHbPt@wgg8;YV+%_*>x9bW2ez zdc=DUX2S7l3-gl0$vmXN?z_t+44G%8?Sqc3Q}#_3CQFky=vDuwCy!DJK(wRLowsn{-@YuIOyG;L2}@V;^gGyy9dr) zbZ{dk^xH{Vb8TbWi-D8e>wNI*Xt^D3Jd(V5`xQu>M?M6|5H}nxs&ai_|B?tqzX5qL zlM{z|XV!3`);G*U8m}Xe^n8Z)sPq?4tK*Bxyl32GD-TNe7>dB|5 zYl2xjfI$6gOnty3gJ+u*y?Q=@H$xY^f@qH4i<0#B?HIfL&X0^ZKRYtM8DK$`i*zxk zI$jT`=`W(5v-s&E;yPY2MLiAo?7yHs+XYl0E#e%#jL}6c7Ke*71z7yjCl7xQ6|Td- zBI_jICG2vYu*-JCE|UpK6wUAMmX$&}|`X{BD$a^SOblrx9!HqtMyt2bD&_!yP2CLu#lPzutgx<830?D!^3WBU8EWP>w;mGYIx8ztDn?X09khK|A_# zvQo>eJ;OoUKZ#^0vv~5GTKc)kf7P&{UWAo=ftLR1Bo>U!ZKc3ZeHVmEs4b7W^*gl5 z)rZl}=nveXDpUN=4Oqw0FQB;-h*i&+{zfBT{E%ole1UIL6#Hm`_rdM^2-nx zZo62^cuG_MUWc;FpLUmK-?xCvc&45N+ouE**L~^YzXJH|%X#}pq(59MIFI}-v~zF` zt~9o1mc9>|v=Q@9fIVN_jS|fjYO1Q@aoQV&ssG3nHxuSa@(0->_|n<$v<+oeRukx@ z)PaHRTd85{GzQtV$dTBWW4=9!t2C$&TFrh9lcUftl=DNV;~j+eeDTlgzz)ar#Xps~ z$(48E$ACP#)NJI`w_9F~(E+YuVEn zd6+Xw4laXA2d<& zOMcb)Ip2!)uWN(17q7C$yPgvm*zoW-=z>58Ls;#vfW1IQO52``zjF3rQ+`PDEBxdm zHn5;3+Q2$>_y(?~Ldf5C_#Wc>OX#6`K^r2dC=XZHKAnNEt3(s2uA~dMvLt(ZU5V~+R~K^pMwVy{mFP~i)m7eH?^RMX_uSRfdrh|khtJ~MOT;=g-@<6z2I|{g{E&0~uIf%XJ-x2Z?r?i_ zZ6X>;cJ}r#mupvNJedl2?My}Ehj?Si-Ub`2ZguTHl#C`^;iM~UP~k-J&uABn_a;G0 zwLI9F>~KYJ6OXI1Z*?ExslLY&(Nt?Ml!yB;!o-_y1WNWsdb?b(VS)gwTPuXrGq>B{ znS>qhiRR*yv;f;Y@JB(FRq$G4KdFRz@;E64K6qj_a_URld4pU&(5XkPhDUU@99+@D7; zm{%UjE3+%D{U6So|620!chuC};40ry-@L}PeoMvrib|yznO>;Bpq~r=abbM?uI2|G zjN@PB8tc5J;Ca*0tVI>?O{}!io3-Hug#Vfvz+Q#}Cu_lNq)0EtS%ka`=_=g1Q$l&9 z+i|*(r~9z)!0AT51!)v#3-U3f_u*_sz8~pfoE^vyA^kMYF656ReI92Y@}o$9jI$s4 z38Wiv^VCDgyODkd=MeHENWY8oG33XPejn#B;UlFR&yFDve`Xn+Par>p^iOd91M*nc z*`t?YOPBIUe~WVzdA5qN{g*NJ3i2_erPz$hB2Q^OP8Mb?i1c=xCCIlR{WUiDmLQMV zfqlq@TY-@uM!N7S#x6&`6zTuOc|G#Sk$SFXY!mW9q}A8pHe}>mkVbHNk&hvL45xh z*jJ`JQe1}iQRyftW+l}{m4z(@F#~r$$u4E3l*-gMb>_EG z@ccBtKl$}kR&pT!yE@$cCgmrOU|u8evClAGlDQE(cN<`ywf04t&-HCQR z+Si%nrHGm9?WX7LIJC{k5i@kG8{qK%-Z(W15Me>S1N&2iMSHQ~E)Gk^yP`cZ*AwX`Z5}Ym^`kh4 z^HyJDy)UqFLq%&Br@8`de%EJr;y@&xR2(7ah2w$-{wryq64x-)pW)ze%W!PCe|Tv4 z(c$6Y%b7dyk{!vl1N=R{{Paz^$@3ki&Vn@ zSIXaAH%ucY`APwQ!dc)?gLVP@2cm&8$XSdlGZL4sVVK1MsJ4b``WL@V;L)YsLTvZC zYBp??zlgZjb|+Ik(e3Tgo@l%?f?_xx>Db;^wPh`Swp`opTHCO}JlpoCI{D9WYr8u4 zw?}~9*3-K-kqk%fT1!6|BHlwBcSYfjOA0yb+rf~|0>^?#M>x(3N)~qY;>SGxFV#aa z27b^)T!NZnvxWl-7A86n;JB=4Ma8_bNMMWn!aNkg!wPhi7-bKq5MT>UjJk$hdssbuQ01g`2n8?ZC0AVU~@>L-obF}5)(}H18`x~ zvP(^{x1qUOfmfMeh20Qjmzhw74Co)3(4fbwhqQ)9_+@i_O_RT&p1s{#+vsa*ZmbWn z%S~`?Lvy2kOEWwt;IH#Hu`A3fPXKqs=uQ3|T9Y1VXb6VbJIqQrM$>LjBRbOPtM$Pr zNp01>dS9)-1_PK~X`%*dg3@7KCa}>5Y3mxi{@T6lDicha^w-yTeKqWA6Y8&T^6l_7 z>fTzk@Nw&e*fnN-L$FEr>}Y0XCdjj^!S5xb@S*APdK=klvo?g=c;Hga0rIPj8@Ll* zJ56;Jd#70=$ik|6Ll7g4hJ6ipmU0tE+AP%62%W%0Ys@OHtVaFT`i9;0@YNu7B~%UV zG=!?zbtWDLI@i9BbhHce2&Y98u&!J&#NK7%liETat(IMHh1CR_ylkx%R^7xZOo-@^ zbt0>-^Rsm`qcrWSsc*t-Z;SysC7nQstueBYU@5 zNdv7BLSxY)zYFP24f-yOMfVISwT%sR9LX~!l2}JWn2kcL+Qg6}p}K~aQKBHnIIA&n z#Oi4oY&q*V^O~4scgTs(>bV%cDKXT_j)25)Up^rOLt1lfHQQl=h4*c$-ix(^B=+xt zGBvZF`0nsDX;9h*TGd0$Z>(jC9)q8ibkhR`Mh8u@9nOL4(dm!j^BdmD_MC%~iWy8#g z+ICwip*Lg1?XlF5Bx=;|wbcl%!$I|+ud$Bpvmij7xpCrcmP(?_R{$EUeqRmT?{*6U zwa!Dh!-mkbJ9pi<(^g;S3DLx2E-ku=B4dr+{w7U_CGmDMW6*d4yFGhDh=LKK>j+E{ za#n3+VG}`2^|X4y#%#Y?5z_p%P3u|2gzWRt+P0?&zMutsgP}XLGbD|7=ccZ__$J)(GuC}`M`Yyjm$28bs*3{!-r@21p@i%(lovhQW zQK36spNJs*J@!fw^kF1WK*mG+yK<0ZM|SeHh;`X(G(VamJnc5CWUoobtj7ckM-4Q{ zh&9A|%{m%ddup^D)huQOqT7}1ZWBhG6>}%8n=EeD7;a1RDnc@f{n6cp*TbfLd~3o9rp zjP!QLx}tq96INot#C||QsR2-11ug?(*-I!@l!>>ABS{0en0_MCV@Yg?Tvxgo8wN!~ zcu~odIF~Sp&DY<6A0ts`>;4K*z`W!;5>`nt-sA_!EKSqK16x4NyO(?e{}&au+%0Yp zTQGuRj|2uK5a<_<{}LK-T+B)@z1Z2) zyNI(}Ot2dkgW_W1u=o<9>se(+v72Rm`eF? zB=sz|o!&i|=^j=W5}vf7A~KuboEK}BCrL#GZ=U0?%w$aNThYPV_jj$OU)B=Ho0b2K zj5~*}R zMK3}SS1yEZ)T0Lpawz{1M5G_lOrUPYH4?P)#8wi*y>bS-e) zGtfaR+}ug#>qS#($joC(k&)Tu_d`Vs=sqiz>EqQLf1 z(Kyss5C{NO0>`K_GU`hN1mL66QD)SsOojZa0hJ*(=(OU$dPbd~jHxdqP#%H+N5g;I z4wH8z&>@%6cYD2>X?3)dGwq>v3Z>R!#@!!nN2nDclJ|p742)*7`=0=(e+C?-LqUgR z#wHHeCUFqQ`RK3`O@t0rYco|kB+UsO(mZHAlnzOALWgShwBnHt-E+`kCz=TzCa~jX zU8_{zX$%VgvGSnOTh(`>MKZU{L1wx+TF6Ydep_WG2+8F1V`bndmX;VEJ#cnF*9Yaxn13JgKO=s4Rz#*T`lw{Y>xkq{G?tGdu_Vd=zwM($6!{&*&WVBXE8{`sts8en^M2>1SjP`uRIU zKe(Q?4>9&V=x1UE{ls!EUiq2=dWaa&$_?%_&Bn!`+7CSZ!RyEq%;ktE>3A6ojc@A3 zOmwaW9b=|tYP<`XiSqvet%W$v!|u;k?u4ng#Q?g=M(1Y!(zqwhp#wmF<)brAzhF$U$=h}^&t>z92V zLlu{q5QGJ%U%Q_Lu8{d4Zeopu4_yGy)~^=eh<+iHcPKv`23+`|z3<8op8$^V!|B%@ z_Z;-|UCH~A@$`>Is#}SZwK-qv0uL$T2 z&A`j9=f}~CJit88$H!u526mXEXm+q3%EyiYEPRaiz^#Y!v3~#k9$z zIZT05>4zvvoSKQ5aO9WT2w%|Akjv{k=jGjJXjR;>Pfd(N@bY>LTTF`+*hiN*rpyKj zo{=(pvKE>1aNu@j`}XZ4kDybmdUhNInGT2Qd3-nrpI>hHd|wZKYR5n|yeoBm5E1+)AD>o{87}BmyQ(^5?1MF~a#UaE>A6_gP1c7OUb!grupAZLWk&f4ab9@FIyLDo4^mmEI z^1m8!bFt1a+HeV{TFvE%8-|!d=G#nx6)!pBMo|_cj<{M6wc`H(U~~_fFT;PRFYlmP z%RL8AxEE50nLp=YebW)xH4vn9>CD#I2At#IZ(3&uAi)`0Da{MY)QbSxlU~f@d^B%x zpDLD6d#C8B(5n5~N@=wOI6|v5*HM402Whos4q82GXtgidmz+9QZ{hl(zFT=v>3sS= z0({|hB)R<{nfvD;^G6MtvCpes1CiqicS3nW=ILW~yyS*4=k(s9>~8jbJ)~P{5&C@k zP9sJ1on`>*q2lKy=$Ej2sTBbw_4Q$LJ^aBId+;Bx*3b{L}0G_QV|Is`iAi2FxC#sD-SF9K4E?ev-E)huwbmR7$8Wz8fYbIQu}{Y8ilUgdTi`R(W`!-AE- zvCjfL(wl z>us_1m8Q@r(%YhwLguMQw?ttx;n@35W5IeTySo9NE!vr>@5=68^h?-%3LQ$jQ?w&S z?)1?qMKZIE+%tv_bE76aAD&27sJ_SMVE6OuyNYJ4dL+BM<{#ey&s&R+nB-oH2#&ZmHLd={K9 z0*Bt$I1`m(JZ3%AYLx_z@cAwv zyxod~68VpN4*s(Y?kD2Iw(wn18yWT+5*7ztr;52(0ytFsTZH+E4nF!E_FXKhyHw&J zC;}aNkplJ5Jd~a*@nadf2AmCMrOdrmdM0w$NO>q`D0I|F4%TF&#G&8e3ms$&`&g}( zI0%Y>v(rjImg*$VGLEC+zm<;qMgD5T7WTWulzw7{e(bE~ryrM{5Fljta-_oUPDBG! z+nuZ}?Ecv)>5vXdb3%tM4IZ!%=Yqd?-lX}bBFG-LaqAK@EQ z&*Aodxrx2yQ2rAHlsq*5H$*~Kam!5oQfqe~`Yz@%@(}Bx=CLv0%(iw9&p|)+rhfF8 zuJgCdnGq1)6bJnr=VNDT{g8BbliNWUn8en1(udNaphGw`I;{Szbl8HLw?c{A4 zV>14EZ4NpdnS%~ntU45%+c{1)2>p!AqQgH2&IA-Vn+^q?nROU@i*)$V9CS!=)NFn@ zHU}NXtvcj`6?bzQu6P3a8J$Ik&jDv_4muQcX4YZgH(vxgs?~J&F_r^oVg|}YD3F6Q6&*E82wr3z ziq6oboI}kLL$XHvKA4ANTS07!U}~lvGORNNj+`Y*4eLz5_vM;{bv|mb&R^IVzMmjq zFJP`{pkC2E3DGbqlSK_pGC_J{qT6kDiBIMqMn0(_Q1VG zL>;D$wodsPRbvMDJLso$4*C%|)c5J{uRH!0>E|&+KQ?n3_nmH_YxcTRc0&6eP~|X&zW>2_bMCrQX6-d z*-CPMbY9%wH0en0ZX5SiHtyG%tt9v1d2y$03X7FDYc}pK8~05%?oZ5%dy$QM(8j&o z#{DK6_v7>8jt_<8^?i?xd!>zgt&RJMd2wH2a$j z*tqxGxL4V@Pt1$^u#Nk$jeDPs`*k+%=jO%zF&pJ_y z|A38qpN)Ia#(l)b-8C=n581fKY}_>)_a|)J%jd=Yh>d%Pjl0{%eaPf4=lRp{!`aq2 zla5;Ba(Kx#u2S^^Qsf_R^%_U?XItak3P;Von{#BAegD=BeOGJ2@3CK-hLp_HuV0%_ zC@W8ov-L^k8&mCm%fA1_=sVtVqn^;mn-$bn^2rkHrgEFQrjAK|k9LPz*f$M%C`Q5} z&Sz?>F^}&9=NR-uwS`kW0ZN!Z1CAS`04f5`)VuKx>@+`N>^7sp6ukIv>jsYf_a;0w z7SZvHn0guoEO9JDi(%PHK-Qw9_CN$=8z9Pp0@4f!&0|y&kaj@mFd+v3Ie`;ql{iOH zDggqG6w&GtY9+_1fP4xN6)_6PmjR*cZj;VyfRtP4{0fj3NmaDEh;G5aLAnx<%K(WP zbfAqcdWKFtZzXW5fTL!90r3H1)z4l)Xx~rBqs#BUweUE>C3zhHP7CZlIjOim3<%xM zVM6|zAUG&~7QFru5VwWSX^!K{Y4s`~-vVOoMJax$S941NJsa4iw=$-lm=nCN z2hO9CGjMS8F1^Ksv}I~+Cvf_Wo}!Ai1EQ`S1f3)x>X{<}c@PkCW}zQy^%sCl$XP<* zJPC+Z=BEJ}v+()`(ZM0)QD3?fSN)`C^~F1JC9X98U1|jdncOD;Ic}kR2?p6H(1hmI z*Yw102GP;!rqVjR-5)q4uaJ2iTB%r6KyIYLio>*^ACM7CPg?=8`p^5RmBkbO2Oug= z5R^Xy$f!k!&k>G=*BBrg(9O|w77!P3h=!m;HyM>$=v)R!%%G#bGB{-Mx(&clF~6YW z0b~{Y%+%^mK-4pj0w)50c)qu31XnNg7KumcE#{6F{u9&EtT$E%y2nAT1WitAMCT zSoHljfZPYlg1h=+=7>dyC19-?ILNRo0Xc0*m`KL?H#J8st=0p_stq?FBNqQ@0i?sC z!)`z-Eiyk0$dIMg#{p40W}+9L1|(>q^E4n67CQ7&Z&IK+F8%`$(!6PlWq3lO%F>I= z0a4dqg4Y&6DlNP`fb?4+K|sJfhgTGkQ46mWAm-XcS{(r7oPonvD|mu>R+`o?EKG8A z6TXm#-n%_yv?BOt05RhY?or`K|q2Q$nAifvC!!TgbvaptRvqh(Snjm=O}PGtoi{&?Es6u ze+-Z@i^iS=YflCm?Rrm=dlA#MHC; z3h+Y~nd!^#Lqt!WxQ`eRDw*>10a68!Il7MmLWha-F+j#FI{XYE{YKw0v+?f+pSEy6 z37q2w4pAHhq|bu$3Lr5{-yL`&fkZUrq30pg4g<-KcjWf6@~{B`F?N{fAQhT_1f`1B=>Al@9*&25+VP=2S?ug->s9&i{{8Xj z0afbi?1`#;Tb%aXWfQ1PWeErG_lsk)5I0?S_8h>+v0H=S(v9zI$2I=d>o`k<+oHPo z)U&ShuTvxT4AF}LQ!)8GUs**w+J;XKROmWh+0%J|2=jU!Pn6#^D-w{CQPvSo=n?+C zL-CMc4$;;_Jv{uU8?AK;UH0RnkgPl06K;>T;v?904x&rFU9Hi+NX&S+aymSs+m%eT z;wzJSTW>r9eetJD@sO5!PoK?!K0w5pkW)Xoab!t z)?W3JBwLl(0I-v%Pd{z1ihY4Pr~+ig{~f9(^Vw!+0qzT zwDwBP>#g)sVhe=7rdUa24zt|Pc#!vs@7lLgQ#n?MR8EnQY-Hp`B!+bt`XXDzqcMCM zRmUqeKmcECy^CHUK%c$W55_x_VfvzJcepPkU=TJIrMD{U@l;Q;vpWi1Va$h#`9Tb5 zVvEfN$vp8fwHRNa*82`uQ}_(MvHp5)a$>3B=J5S9ppTU!kR)`nbfHvD+ByBBk-BC49`O@WXxDeAju5{Lhp6O3MqFT4hQ z(qhb7tUgpv>w{9^j%ZTi)%M0ct*v-WmHbiY*Q~7Rg|+i2`TC*}rUNJF%i--$)c5u% z$MN^nq~h^tPm&`TbfZ0~ZoRXcyB!|1iihKeu#R+}?@DK^Xj3njIcuLB#^jtB)!jPv zUp=E@cRffZpI;GZ(@nM11zlaF3RLy9!8AmTYN6luDDF-ctFR7t63cLoL(W(&NWi^k zA~AFIw7jvujegMw0N}RKFuf-RE5uYV!qhFdSu^V_sf!h!Ho_}pkRWfGH;erKc(W+p z)V^i&=8c;{e1@rmW&*N-*rL%f==Ni{;s0IHcu%wo-w+K$Il|q!tt<{ew>68n1@jO1 zQjqHZD$u;iQ#25;Z~Dzcj5rua^4zk!}?{;-ojSI-5PZW zHga!9@-!V2GSMC=*BYN9r-`{lIE8^22q%)5`}jHlDPjpeH%kgR;!LsZi~TD4+7&)Q zjgNOHlAVzR*Gr=Fo~Yj2Mq)(n;@*y!%$$Sq>Wvf@z0L$}c#%Ei-S)^@ThasYam;4C zY4|<8aXpdRpE!hNwp%}d*ZI(b!|gx{#P+c2wor>V+JzXk1`ARr)MZR)G^)gcY?==q9?P{m>A>P_ zMz3??Jhg;O=kxqZ2yxX^0jSc47&S&*oJZ1(wWTS8;+ye?QCj6DjWSX!PcwmN_`q}? zMT5|^o<4;NjZVw8(qojey4`@qc zMp2cWKM;v@VX84YFS{I(c!q4!_R`5sk2Ddcx;z#FPHj+{@DaU5`yg0eTQSWm^+$5l zZ|T0tTU{BDc;;2KR2@tn?lNa1Yfq@9y53bzloR9eSy%4J2y*fVD}(oRu{6IxvickO ziysn-D;k@J>$zr}36JQ=Ntv>0^96eB=BtfmG$|h;)!^Y!Adw4OmmK*ssz9AF2FME? zXgSK6wMZKAa|dD!U%;~$-!~7^7va7BM*ex^y$0hMrzDWIUdgESl~2&Q%c~fmNrNor za$~8*xXWF?NI2^flE=m{sfvPyKGbhKN|Ba8n22&2a9JXj$!Yqnh8;Dq&*C%|rod23 zHs?YHq>!G>Odq4-lQsoeh6m$n+t__j(&hvy(+rXvy2J<0&VN`jv>`TS6g_JItURuh zwtRA?a7=i;VeBGcb1mFX!IeQR|4KrSbljCl-lccbwOHOXz<1bmJvY9Cl7PqcfzbjL z@QuBxE-6S!(?(uIK>qs5wFyLf7dRJsTeRuoW;gh+5IIHq0WH@NjH;Q|E!h+0Z_`D7 ev#+XL)G01w3@OCslbTVn1;T&SQJ!QB;r|O2<4(!| diff --git a/env.sh b/env.sh new file mode 100644 index 0000000..06d3e7a --- /dev/null +++ b/env.sh @@ -0,0 +1,3 @@ +#/bin/sh + +export PATH="C:/usr/local/w64devkit/bin;$PATH" \ No newline at end of file