Lime Parser Generator 0.1.0
Runtime-extensible LALR(1) parser with SIMD tokenization and LLVM JIT
Loading...
Searching...
No Matches
utf8.h
1#ifndef LIME_UTF8_H
2#define LIME_UTF8_H
3
4#include <stdint.h>
5#include <stdbool.h>
6#include <stddef.h>
7
8#ifdef __cplusplus
9extern "C" {
10#endif
11
12/* Decode one UTF-8 codepoint from *p. Advances *p past the decoded character.
13** Returns the codepoint, or -1 on invalid sequence. Stops at end. */
14int32_t utf8_decode(const char **p, const char *end);
15
16/* Encode a codepoint to UTF-8. Writes 1-4 bytes to out.
17** Returns the number of bytes written, or 0 if cp is invalid. */
18int utf8_encode(int32_t cp, char *out);
19
20/* Return the expected byte length of a UTF-8 character given its first byte.
21** Returns 0 for invalid lead bytes. */
22int utf8_char_length(unsigned char first_byte);
23
24/* Unicode character properties per UAX#31 (Unicode Identifier Syntax).
25** ID_Start: Letters, Nl, Other_ID_Start, minus Pattern_Syntax/White_Space
26** ID_Continue: ID_Start + Mn, Mc, Nd, Pc, Other_ID_Continue */
27bool utf8_is_id_start(int32_t cp);
28bool utf8_is_id_continue(int32_t cp);
29
30#ifdef __cplusplus
31}
32#endif
33
34#endif /* LIME_UTF8_H */