#include "../Headers/slex_core.h" #include #include #include #include // Helper: Trim leading and trailing whitespace static char* trim_whitespace(char* str) { while (isspace((unsigned char)*str)) { str++; } if (*str == '\0') { return str; } char* end = str + strlen(str) - 1; while (end > str && isspace((unsigned char)*end)) { end--; } *(end + 1) = '\0'; return str; } // Helper: Get next line from buffer static bool get_next_line(char** cursor, char* line_buf, int max_len) { char* c = *cursor; if (*c == '\0') return false; int idx = 0; while (*c != '\0' && *c != '\n' && *c != '\r' && idx < max_len - 1) { line_buf[idx++] = *c++; } line_buf[idx] = '\0'; // Skip newline characters if (*c == '\r') c++; if (*c == '\n') c++; *cursor = c; return true; } bool slex_read_rule_from_cstr(char *content, slex_rules *output_rule) { if (!content || !output_rule) return false; // Initialize output structure output_rule->rules = NULL; output_rule->rule_count = 0; output_rule->mappings = NULL; output_rule->mapping_count = 0; output_rule->code_blocks = NULL; output_rule->code_block_count = 0; typedef enum { STATE_NONE, STATE_RULE, STATE_MAPPING, STATE_CODE } ParserState; ParserState state = STATE_NONE; slex_target_language current_lang = c_language; bool has_lang = false; char* cursor = content; char line[4096]; while (get_next_line(&cursor, line, sizeof(line))) { char* trimmed = trim_whitespace(line); // Skip comments and empty lines if (trimmed[0] == '\0' || trimmed[0] == '#' || (trimmed[0] == '/' && trimmed[1] == '/')) { continue; } // Section switches if (strcmp(trimmed, "rule:") == 0) { state = STATE_RULE; continue; } else if (strcmp(trimmed, "mapping:") == 0) { state = STATE_MAPPING; continue; } else if (strcmp(trimmed, "code:") == 0) { state = STATE_CODE; continue; } if (state == STATE_RULE) { // Split into and // Tag is first space-delimited token char* tag = trimmed; char* pattern = trimmed; while (*pattern != '\0' && !isspace((unsigned char)*pattern)) { pattern++; } if (*pattern != '\0') { *pattern = '\0'; pattern++; pattern = trim_whitespace(pattern); } if (strlen(tag) > 0 && strlen(pattern) > 0) { output_rule->rule_count++; output_rule->rules = (slex_rule*)realloc(output_rule->rules, output_rule->rule_count * sizeof(slex_rule)); output_rule->rules[output_rule->rule_count - 1].Tag = strdup(tag); output_rule->rules[output_rule->rule_count - 1].Pattern = strdup(pattern); } } else if (state == STATE_MAPPING) { // Split into and char* id = trimmed; char* tag = trimmed; while (*tag != '\0' && !isspace((unsigned char)*tag)) { tag++; } if (*tag != '\0') { *tag = '\0'; tag++; tag = trim_whitespace(tag); } if (strlen(id) > 0 && strlen(tag) > 0) { output_rule->mapping_count++; output_rule->mappings = (slex_mapping*)realloc(output_rule->mappings, output_rule->mapping_count * sizeof(slex_mapping)); output_rule->mappings[output_rule->mapping_count - 1].Id = strdup(id); output_rule->mappings[output_rule->mapping_count - 1].Tag = strdup(tag); } } else if (state == STATE_CODE) { int len = (int)strlen(trimmed); if (trimmed[0] == '%' && trimmed[len - 1] == '%') { // Language definition block like %c% or %c#% or %csharp% char lang_name[256]; strncpy(lang_name, trimmed + 1, len - 2); lang_name[len - 2] = '\0'; char* trimmed_lang = trim_whitespace(lang_name); if (strcmp(trimmed_lang, "c") == 0) { current_lang = c_language; has_lang = true; } else if (strcmp(trimmed_lang, "c#") == 0 || strcmp(trimmed_lang, "csharp") == 0) { current_lang = csharp; has_lang = true; } else { has_lang = false; } } else if (has_lang && strcmp(trimmed, "%post_processor") == 0) { // Read all lines until "post_processor%" int cap = 4096; char* code = (char*)malloc(cap); code[0] = '\0'; int code_len = 0; char code_line[4096]; while (get_next_line(&cursor, code_line, sizeof(code_line))) { char* trimmed_code = trim_whitespace(code_line); if (strcmp(trimmed_code, "post_processor%") == 0) { break; } int line_len = (int)strlen(code_line); if (code_len + line_len + 2 >= cap) { cap *= 2; code = (char*)realloc(code, cap); } strcat(code, code_line); strcat(code, "\n"); code_len += line_len + 1; } // Add or update code block int block_idx = -1; for (uint64_t i = 0; i < output_rule->code_block_count; i++) { if (output_rule->code_blocks[i].target_languge == current_lang) { block_idx = (int)i; break; } } if (block_idx == -1) { output_rule->code_block_count++; output_rule->code_blocks = (code_block*)realloc(output_rule->code_blocks, output_rule->code_block_count * sizeof(code_block)); block_idx = (int)output_rule->code_block_count - 1; output_rule->code_blocks[block_idx].target_languge = current_lang; output_rule->code_blocks[block_idx].post_processor_code = NULL; output_rule->code_blocks[block_idx].variables = NULL; } output_rule->code_blocks[block_idx].post_processor_code = code; } else if (has_lang && strcmp(trimmed, "%variables") == 0) { // Read all lines until "variables%" int cap = 4096; char* vars = (char*)malloc(cap); vars[0] = '\0'; int vars_len = 0; char vars_line[4096]; while (get_next_line(&cursor, vars_line, sizeof(vars_line))) { char* trimmed_vars = trim_whitespace(vars_line); if (strcmp(trimmed_vars, "variables%") == 0) { break; } int line_len = (int)strlen(vars_line); if (vars_len + line_len + 2 >= cap) { cap *= 2; vars = (char*)realloc(vars, cap); } strcat(vars, vars_line); strcat(vars, "\n"); vars_len += line_len + 1; } // Add or update code block int block_idx = -1; for (uint64_t i = 0; i < output_rule->code_block_count; i++) { if (output_rule->code_blocks[i].target_languge == current_lang) { block_idx = (int)i; break; } } if (block_idx == -1) { output_rule->code_block_count++; output_rule->code_blocks = (code_block*)realloc(output_rule->code_blocks, output_rule->code_block_count * sizeof(code_block)); block_idx = (int)output_rule->code_block_count - 1; output_rule->code_blocks[block_idx].target_languge = current_lang; output_rule->code_blocks[block_idx].post_processor_code = NULL; output_rule->code_blocks[block_idx].variables = NULL; } output_rule->code_blocks[block_idx].variables = vars; } } } return true; } bool slex_read_rule_from_file(FILE *f, slex_rules *output_rule) { if (!f || !output_rule) return false; // Determine file size fseek(f, 0, SEEK_END); long size = ftell(f); fseek(f, 0, SEEK_SET); char* content = (char*)malloc(size + 1); if (!content) return false; size_t read_bytes = fread(content, 1, size, f); content[read_bytes] = '\0'; bool success = slex_read_rule_from_cstr(content, output_rule); free(content); return success; }