commit 7d974680a6a17e8c5e79d0de3abd1d02057ed50d Author: Creeper Lv Date: Tue May 26 03:56:30 2026 +1000 Init Commit. diff --git a/Headers/slex_core.h b/Headers/slex_core.h new file mode 100644 index 0000000..67c5c00 --- /dev/null +++ b/Headers/slex_core.h @@ -0,0 +1,56 @@ +#ifndef __SLEX_CORE_H_ +#define __SLEX_CORE_H_ +#include +#include +#include +typedef enum slex_target_language +{ + c_language, + csharp, +} slex_target_language; +typedef struct slex_options +{ + slex_target_language target_language; + char *header_output; + char *namespace_name; + char *class_name; + char *prefix; + char *data_type_name; +} slex_options; + +typedef struct slex_rule +{ + char *Tag; + char *Pattern; +} slex_rule; +typedef struct slex_mapping +{ + char *Id; + char *Tag; +} slex_mapping; + +typedef struct code_block +{ + slex_target_language target_languge; + char *post_processor_code; + char *variables; +} code_block; + +typedef struct slex_rules +{ + slex_rule *rules; + uint64_t rule_count; + slex_mapping *mappings; + uint64_t mapping_count; + code_block *code_blocks; + uint64_t code_block_count; +} slex_rules; + +bool slex_read_rule_from_file(FILE *f, slex_rules *output_rule); +bool slex_read_rule_from_cstr(char *content, slex_rules *output_rule); + +bool slex_translate_to_file_c(slex_options *options, slex_rules *rules, FILE *output_file); +bool slex_translate_to_file_csharp(slex_options *options, slex_rules *rules, FILE *output_file); +bool slex_translate_to_file(slex_options *options, slex_rules *rules, FILE *output_file); + +#endif \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..ad74ea4 --- /dev/null +++ b/README.md @@ -0,0 +1,181 @@ +# Simple Lexer + +Simple Lexer is a simple lexer that translate a lexer rule file into a target programming language source code. + +Currently, this project aims on supporting following platform/languages: + +|Language| Version| Platform| +|--------|--------|--------- | +|C | 99 | Win32, POSIX, ESP32 | +|C# | 9.0 | .netstandard2.1(all possible target platform, including Unity)| + +## Lexer Rule File Format + +``` +rule: + + + +... + +mapping: + + + +code: + +%% + +%post_processor +... +code for lang2 +... +post_processor% + +%variables +variables in post_processor accessible scope like state management. +variables% + + +%% +%post_processor +... +code for lang2 +... +post_processor% + +%variables +variables in post_processor accessible scope like state management. +variables% + +``` + +Code for each languages are for post-processing purpose only. +Code inside post_processor will directly replace staff inside `slex_post_process(...)` for each language. + +## Usage + +``` +slex [options] [options] + +input file usually ends with `.slex` + +Options: + +-o output file/output folder +-l specify target language, e.g: c, c#, csharp +-h
output header file (will separate output implementation and definitions when language is c or c++. Note: c++ is currently not supported.) +-ns specify namespace (supported languages only). Default is `slex_generated`, `SLexGenerated`, `io.creeperlv.slex.generated` for applicable language. +-class specify class name (supported languages only). Default is `slexer`, `SLexer`. +-prefix specify prefix for functions. Default is `slex_` for languages does not support namespace/class, `` (empty string) for languages support namespace/class. +-data_type specify the name of the segment data type. Different language have different default value: + +``` +## Data Type Name Table + +|Language| Type Name| +| - | - | +| C | slex_segment | +| C# | Segment | + +### Generated Lexer + +All usages here uses default settings + +#### C99 + +Default options: + +``` + -prefix slex_ -data_type slex_segment +``` + +Usage sample: +``` +void slex_sample(FILE* f, char* file_name){ + + struct slex_segment* head; + const char* str=""; + if(slex_file(f, file_name, &head)){ + //Success + } + slex_free(head); + if(slex_cstr(str, &head)){ + //Success + } + slex_free(head); +} +``` +API and defined data types: +```c +typedef struct slex_segment{ + char* head; + int64_t length; + char* file_name; + int64_t line; + int64_t col; + enmu slex_segment_tag; + enmu slex_segment_id; + struct slex_segment prev; + struct slex_segment next; +} slex_segment; + +typedef enmu slex_segment_tag{ + +}slex_segment_tag; +typedef enmu slex_segment_id{ + default, +}slex_segment_id; +char slex_file(FILE* f, char* file_name, slex_segment** head); +char slex_cstr(char* input, char* file_name, slex_segment** head); +char slex_free(slex_segment* head); +``` + +`slex_post_process` definition: + +```c + +typedef enum slex_post_process_result{ + slex_continue, + slex_skip, + slex_continue_with_output, +}slex_post_process_result; +slex_post_process_result slex_post_process(slex_segment* input, slex_segment** output); +``` + + +#### C# +Default options: + +``` + -ns SLexGenerated -class SLexer -data_type Segment -prefix "" + ``` + +##### APIs: +``` +namesapce SLexGenerated{ + public class SLexer{ + public bool SLex(FileInfo inputFile, out Segment Head); + public bool SLex(Stream inputStream, out Segment Head); + public bool SLex(string inputContent, out Segment Head); + private PostProcessResult slex_post_process(Segment Input,out Segment Output){ + //Default implementation: + Output=Input; + return PostProcessResult.Continue; + } + } + public enum PostProcessResult{ + Continue, + Skip, + ContinueWithOutput + } + public class Segment{ + public string Content; + public string FileName; + public Segment? Prev; + public Segment? Next; + public long Line; + public long Column; + } +} +``` diff --git a/Source/Languages/C/slex_template.c b/Source/Languages/C/slex_template.c new file mode 100644 index 0000000..a966708 --- /dev/null +++ b/Source/Languages/C/slex_template.c @@ -0,0 +1,7 @@ +#include "../../../Headers/slex_core.h" + +bool slex_translate_to_file_c(slex_options *options, slex_rules *rules, FILE *output_file) +{ + // TODO: Stub for moment. + return false; +} \ No newline at end of file diff --git a/Source/Languages/CSharp/slex_template.c b/Source/Languages/CSharp/slex_template.c new file mode 100644 index 0000000..a4f3669 --- /dev/null +++ b/Source/Languages/CSharp/slex_template.c @@ -0,0 +1,7 @@ +#include "../../../Headers/slex_core.h" + +bool slex_translate_to_file_csharp(slex_options *options, slex_rules *rules, FILE *output_file) +{ + // TODO: Stub for moment. + return false; +} \ No newline at end of file diff --git a/Source/cli/main.c b/Source/cli/main.c new file mode 100644 index 0000000..e29ad76 --- /dev/null +++ b/Source/cli/main.c @@ -0,0 +1,6 @@ +#include "../../Headers/slex_core.h" + +int main(int ac, char **av) +{ + return 0; +} \ No newline at end of file diff --git a/Source/slex.c b/Source/slex.c new file mode 100644 index 0000000..7972179 --- /dev/null +++ b/Source/slex.c @@ -0,0 +1 @@ +#include "../Headers/slex_core.h" \ No newline at end of file diff --git a/bin/slex.exe b/bin/slex.exe new file mode 100644 index 0000000..9d98927 Binary files /dev/null and b/bin/slex.exe differ diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..418bc60 --- /dev/null +++ b/build.sh @@ -0,0 +1,20 @@ +#!/bin/sh + +mkdir -p ./bin/ + +if [ -f "./env.sh" ]; +then + source ./env.sh +fi + +if [ -z "$CC" ]; +then + CC=cc +fi + +if [ -z "$SKIP_EXE" ]; +then + COMMAND="$CC ./Source/*.c ./Source/cli/*.c ./Source/Languages/*/*.c -o ./bin/slex" + echo "$COMMAND" + $COMMAND +fi