Lime Parser Generator 0.1.0
Runtime-extensible LALR(1) parser with SIMD tokenization and LLVM JIT
Loading...
Searching...
No Matches
jit_tokenizer.h
1/*
2** JIT-compiled keyword tokenizer.
3**
4** Generates optimized machine code for keyword classification using LLVM
5** OrcJIT. Instead of hash table lookups, the JIT builds a trie/switch-based
6** classifier that maps input strings directly to token codes through a
7** series of character comparisons compiled to native branch sequences.
8**
9** When JIT is unavailable (LIME_NO_JIT defined), the classifier returns -1
10** for all inputs, signaling the caller to fall back to the hash-based
11** TokenTable lookup.
12*/
13#ifndef JIT_TOKENIZER_H
14#define JIT_TOKENIZER_H
15
16#include <stdint.h>
17#include <stddef.h>
18#include <stdbool.h>
19
20#ifdef __cplusplus
21extern "C" {
22#endif
23
24typedef struct TokenTable TokenTable;
25
26/* ------------------------------------------------------------------ */
27/* Statistics */
28/* ------------------------------------------------------------------ */
29
38
39/* ------------------------------------------------------------------ */
40/* Opaque handle */
41/* ------------------------------------------------------------------ */
42
43typedef struct JITTokenizer JITTokenizer;
44
45/* ------------------------------------------------------------------ */
46/* Public API */
47/* ------------------------------------------------------------------ */
48
49/*
50** Create a JIT-compiled tokenizer from a TokenTable.
51**
52** Reads all keywords from the table and compiles a trie-based classifier
53** that maps (input, length) pairs to token codes. The resulting
54** JITTokenizer is independent of the original table and does not hold
55** a reference to it.
56**
57** Returns NULL if:
58** - JIT is not available (LIME_NO_JIT)
59** - table is NULL or empty
60** - LLVM compilation fails
61*/
62JITTokenizer *jit_tokenizer_create(const TokenTable *table);
63
64/*
65** Destroy a JIT tokenizer and free all associated resources.
66** Passing NULL is safe and does nothing.
67*/
68void jit_tokenizer_destroy(JITTokenizer *tok);
69
70/*
71** Classify a keyword using the JIT-compiled trie.
72**
73** Returns the token code if the input matches a compiled keyword,
74** or -1 if no match is found (caller should fall back to hash lookup).
75**
76** The comparison is case-insensitive for ASCII letters (matching the
77** TokenTable's behavior for SQL keywords).
78**
79** Parameters:
80** tok - JIT tokenizer (must not be NULL)
81** input - Pointer to the keyword string (not necessarily NUL-terminated)
82** len - Length of the input string in bytes
83*/
84int jit_tokenizer_classify_keyword(const JITTokenizer *tok,
85 const char *input, size_t len);
86
87/*
88** Get compilation statistics for the tokenizer.
89*/
90JITTokenizerStats jit_tokenizer_get_stats(const JITTokenizer *tok);
91
92/*
93** Check whether the JIT tokenizer is available at runtime.
94** Returns true if LLVM support was compiled in and initialization
95** succeeds, false otherwise.
96*/
97bool jit_tokenizer_is_available(void);
98
99#ifdef __cplusplus
100}
101#endif
102
103#endif /* JIT_TOKENIZER_H */
JIT compilation statistics for the tokenizer/keyword trie.
uint64_t compile_time_ns
Wall-clock nanoseconds to compile.
uint64_t code_size_bytes
Approximate generated code size in bytes.
uint32_t keywords_compiled
Number of keywords in the compiled trie.
Thread-safe token lookup table.
Definition token_table.h:40