182 lines
4.1 KiB
Markdown
182 lines
4.1 KiB
Markdown
|
|
# Simple Lexer
|
||
|
|
|
||
|
|
Simple Lexer is a simple lexer that translate a lexer rule file into a target programming language source code.
|
||
|
|
|
||
|
|
Currently, this project aims on supporting following platform/languages:
|
||
|
|
|
||
|
|
|Language| Version| Platform|
|
||
|
|
|--------|--------|--------- |
|
||
|
|
|C | 99 | Win32, POSIX, ESP32 |
|
||
|
|
|C# | 9.0 | .netstandard2.1(all possible target platform, including Unity)|
|
||
|
|
|
||
|
|
## Lexer Rule File Format
|
||
|
|
|
||
|
|
```
|
||
|
|
rule:
|
||
|
|
|
||
|
|
<Tag> <MatchingPattern>
|
||
|
|
<Tag> <MatchingPattern>
|
||
|
|
...
|
||
|
|
|
||
|
|
mapping:
|
||
|
|
|
||
|
|
<Id> <Tag>
|
||
|
|
|
||
|
|
code:
|
||
|
|
|
||
|
|
%<lang1>%
|
||
|
|
|
||
|
|
%post_processor
|
||
|
|
...
|
||
|
|
code for lang2
|
||
|
|
...
|
||
|
|
post_processor%
|
||
|
|
|
||
|
|
%variables
|
||
|
|
variables in post_processor accessible scope like state management.
|
||
|
|
variables%
|
||
|
|
|
||
|
|
|
||
|
|
%<lang2>%
|
||
|
|
%post_processor
|
||
|
|
...
|
||
|
|
code for lang2
|
||
|
|
...
|
||
|
|
post_processor%
|
||
|
|
|
||
|
|
%variables
|
||
|
|
variables in post_processor accessible scope like state management.
|
||
|
|
variables%
|
||
|
|
|
||
|
|
```
|
||
|
|
|
||
|
|
Code for each languages are for post-processing purpose only.
|
||
|
|
Code inside post_processor will directly replace staff inside `slex_post_process(...)` for each language.
|
||
|
|
|
||
|
|
## Usage
|
||
|
|
|
||
|
|
```
|
||
|
|
slex [options] <input_file> [options]
|
||
|
|
|
||
|
|
input file usually ends with `.slex`
|
||
|
|
|
||
|
|
Options:
|
||
|
|
|
||
|
|
-o <output> output file/output folder
|
||
|
|
-l <language> specify target language, e.g: c, c#, csharp
|
||
|
|
-h <header> output header file (will separate output implementation and definitions when language is c or c++. Note: c++ is currently not supported.)
|
||
|
|
-ns <namespace> specify namespace (supported languages only). Default is `slex_generated`, `SLexGenerated`, `io.creeperlv.slex.generated` for applicable language.
|
||
|
|
-class <class_name> specify class name (supported languages only). Default is `slexer`, `SLexer`.
|
||
|
|
-prefix <function_prefix> specify prefix for functions. Default is `slex_` for languages does not support namespace/class, `` (empty string) for languages support namespace/class.
|
||
|
|
-data_type <data_type_name> specify the name of the segment data type. Different language have different default value:
|
||
|
|
|
||
|
|
```
|
||
|
|
## Data Type Name Table
|
||
|
|
|
||
|
|
|Language| Type Name|
|
||
|
|
| - | - |
|
||
|
|
| C | slex_segment |
|
||
|
|
| C# | Segment |
|
||
|
|
|
||
|
|
### Generated Lexer
|
||
|
|
|
||
|
|
All usages here uses default settings
|
||
|
|
|
||
|
|
#### C99
|
||
|
|
|
||
|
|
Default options:
|
||
|
|
|
||
|
|
```
|
||
|
|
-prefix slex_ -data_type slex_segment
|
||
|
|
```
|
||
|
|
|
||
|
|
Usage sample:
|
||
|
|
```
|
||
|
|
void slex_sample(FILE* f, char* file_name){
|
||
|
|
|
||
|
|
struct slex_segment* head;
|
||
|
|
const char* str="<some_inputs>";
|
||
|
|
if(slex_file(f, file_name, &head)){
|
||
|
|
//Success
|
||
|
|
}
|
||
|
|
slex_free(head);
|
||
|
|
if(slex_cstr(str, &head)){
|
||
|
|
//Success
|
||
|
|
}
|
||
|
|
slex_free(head);
|
||
|
|
}
|
||
|
|
```
|
||
|
|
API and defined data types:
|
||
|
|
```c
|
||
|
|
typedef struct slex_segment{
|
||
|
|
char* head;
|
||
|
|
int64_t length;
|
||
|
|
char* file_name;
|
||
|
|
int64_t line;
|
||
|
|
int64_t col;
|
||
|
|
enmu slex_segment_tag;
|
||
|
|
enmu slex_segment_id;
|
||
|
|
struct slex_segment prev;
|
||
|
|
struct slex_segment next;
|
||
|
|
} slex_segment;
|
||
|
|
|
||
|
|
typedef enmu slex_segment_tag{
|
||
|
|
<generated from rule file>
|
||
|
|
}slex_segment_tag;
|
||
|
|
typedef enmu slex_segment_id{
|
||
|
|
default, <generated from rule file>
|
||
|
|
}slex_segment_id;
|
||
|
|
char slex_file(FILE* f, char* file_name, slex_segment** head);
|
||
|
|
char slex_cstr(char* input, char* file_name, slex_segment** head);
|
||
|
|
char slex_free(slex_segment* head);
|
||
|
|
```
|
||
|
|
|
||
|
|
`slex_post_process` definition:
|
||
|
|
|
||
|
|
```c
|
||
|
|
|
||
|
|
typedef enum slex_post_process_result{
|
||
|
|
slex_continue,
|
||
|
|
slex_skip,
|
||
|
|
slex_continue_with_output,
|
||
|
|
}slex_post_process_result;
|
||
|
|
slex_post_process_result slex_post_process(slex_segment* input, slex_segment** output);
|
||
|
|
```
|
||
|
|
|
||
|
|
|
||
|
|
#### C#
|
||
|
|
Default options:
|
||
|
|
|
||
|
|
```
|
||
|
|
-ns SLexGenerated -class SLexer -data_type Segment -prefix ""
|
||
|
|
```
|
||
|
|
|
||
|
|
##### APIs:
|
||
|
|
```
|
||
|
|
namesapce SLexGenerated{
|
||
|
|
public class SLexer{
|
||
|
|
public bool SLex(FileInfo inputFile, out Segment Head);
|
||
|
|
public bool SLex(Stream inputStream, out Segment Head);
|
||
|
|
public bool SLex(string inputContent, out Segment Head);
|
||
|
|
private PostProcessResult slex_post_process(Segment Input,out Segment Output){
|
||
|
|
//Default implementation:
|
||
|
|
Output=Input;
|
||
|
|
return PostProcessResult.Continue;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
public enum PostProcessResult{
|
||
|
|
Continue,
|
||
|
|
Skip,
|
||
|
|
ContinueWithOutput
|
||
|
|
}
|
||
|
|
public class Segment{
|
||
|
|
public string Content;
|
||
|
|
public string FileName;
|
||
|
|
public Segment? Prev;
|
||
|
|
public Segment? Next;
|
||
|
|
public long Line;
|
||
|
|
public long Column;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
```
|