28#define LELY_UTIL_LEX_INLINE extern inline
41lex_char(
int c,
const char *begin,
const char *end,
struct floc *at)
45 const char *cp = begin;
47 if ((end && cp >= end) || *cp++ != c)
50 return floc_lex(at, begin, begin + 1);
54lex_ctype(
int (*ctype)(
int),
const char *begin,
const char *end,
60 const char *cp = begin;
62 while ((!end || cp < end) && ctype((
unsigned char)*cp))
73 const char *cp = begin;
75 if ((end && cp >= end) || !
isbreak((
unsigned char)*cp))
79 if (*cp++ ==
'\r' && (!end || cp < end) && *cp ==
'\n')
86lex_utf8(
const char *begin,
const char *end,
struct floc *at,
char32_t *pc32)
90 const char *cp = begin;
98 unsigned char c = *cp++;
103 }
else if ((c & 0xc0) == 0x80) {
108 while ((!end || cp < end)
109 && ((
unsigned char)*cp & 0xc0) == 0x80)
112 "a UTF-8 sequence cannot begin with a continuation byte");
114 }
else if ((c & 0xe0) == 0xc0) {
118 }
else if ((c & 0xf0) == 0xe0) {
122 }
else if ((c & 0xf8) == 0xf0) {
134 if ((end && cp > end) || ((
unsigned char)*cp & 0xc0) != 0x80)
136 c32 = (c32 << 6) | ((
unsigned char)*cp & 0x3f);
142 if ((c32 >= 0xd800 && c32 <= 0xdfff) || c32 > 0x10ffff) {
144 "illegal Unicode code point U+%" PRIX32, c32);
167 const char *cp = begin;
169 if ((end && cp >= end) || !(*cp ==
'_' || isalpha((
unsigned char)*cp)))
173 while ((!end || cp < end)
174 && (*cp ==
'_' || isalnum((
unsigned char)*cp)))
179 memcpy(s, begin,
MIN((
size_t)(cp - begin), *pn));
191 const char *cp = begin;
193 if (end && cp >= end)
197 return lex_utf8(begin, end, at, pc32);
200 if (end && cp >= end)
207 while ((!end || cp < end) &&
isodigit((
unsigned char)*cp)) {
208 c32 = (c32 << 3) |
ctoo(*cp);
213 case '\'': c32 =
'\'';
break;
214 case '\"': c32 =
'\"';
break;
215 case '\\': c32 =
'\\';
break;
216 case 'a': c32 =
'\a';
break;
217 case 'b': c32 =
'\b';
break;
218 case 'f': c32 =
'\f';
break;
219 case 'n': c32 =
'\n';
break;
220 case 'r': c32 =
'\r';
break;
221 case 't': c32 =
'\t';
break;
222 case 'v': c32 =
'\v';
break;
224 while ((!end || cp < end)
225 && isxdigit((
unsigned char)*cp)) {
226 c32 = (c32 << 4) |
ctox(*cp);
233 isgraph((
unsigned char)*cp)
234 ?
"illegal escape sequence '\\%c'"
235 :
"illegal escape sequence '\\\\%o'",
262 const char *cp = begin;
265 char *ends = s + (s && pn ? *pn : 0);
267 while ((!end || cp < end) && *cp && *cp !=
'\"'
268 && !
isbreak((
unsigned char)*cp)) {
292 const char *cp = begin;
295 if ((!end || cp < end) && (*cp ==
'+' || *cp ==
'-'))
300 if ((!end || cp < end) && isdigit((
unsigned char)*cp)) {
302 }
else if ((!end || end - cp >= 2) && cp[0] ==
'.'
303 && isdigit((
unsigned char)cp[1])) {
309 while ((!end || cp < end) && *cp) {
310 if (*cp ==
'e' || *cp ==
'E' || *cp ==
'p' || *cp ==
'P') {
313 if ((!end || cp < end) && (*cp ==
'+' || *cp ==
'-'))
315 }
else if (*cp ==
'.' || *cp ==
'_'
316 || isalnum((
unsigned char)*cp)) {
326#define LELY_UTIL_DEFINE_LEX_SIGNED(type, suffix, strtov, min, max, pname) \
327 size_t lex_c99_##suffix(const char *begin, const char *end, \
328 struct floc *at, type *pname) \
330 size_t chars = lex_c99_pp_num(begin, end, NULL); \
334 char *buf = strndup(begin, chars); \
336 diag_if(DIAG_ERROR, errno2c(errno), at, \
337 "unable to duplicate string"); \
345 type result = strtov(buf, &endptr); \
346 chars = endptr - buf; \
350 if (errno == ERANGE && result == min) { \
351 set_errnum(ERRNUM_RANGE); \
352 diag_if(DIAG_WARNING, get_errc(), at, \
353 #type " underflow"); \
354 } else if (errno == ERANGE && result == max) { \
355 set_errnum(ERRNUM_RANGE); \
356 diag_if(DIAG_WARNING, get_errc(), at, \
357 #type " overflow"); \
358 } else if (!errno) { \
365 return floc_lex(at, begin, begin + chars); \
368#define LELY_UTIL_DEFINE_LEX_UNSIGNED(type, suffix, strtov, max, pname) \
369 size_t lex_c99_##suffix(const char *begin, const char *end, \
370 struct floc *at, type *pname) \
372 size_t chars = lex_c99_pp_num(begin, end, NULL); \
376 char *buf = strndup(begin, chars); \
378 diag_if(DIAG_ERROR, errno2c(errno), at, \
379 "unable to duplicate string"); \
387 type result = strtov(buf, &endptr); \
388 chars = endptr - buf; \
392 if (errno == ERANGE && result == max) { \
393 set_errnum(ERRNUM_RANGE); \
394 diag_if(DIAG_WARNING, get_errc(), at, \
395 #type " overflow"); \
396 } else if (!errno) { \
403 return floc_lex(at, begin, begin + chars); \
406#define strtov(nptr, endptr) strtol(nptr, endptr, 0)
407LELY_UTIL_DEFINE_LEX_SIGNED(
long,
long, strtov, LONG_MIN, LONG_MAX, pl)
409#define strtov(nptr, endptr) strtoul(nptr, endptr, 0)
410LELY_UTIL_DEFINE_LEX_UNSIGNED(
unsigned long, ulong, strtov, ULONG_MAX, pul)
412#define strtov(nptr, endptr) strtoll(nptr, endptr, 0)
413LELY_UTIL_DEFINE_LEX_SIGNED(
long long, llong, strtov, LLONG_MIN, LLONG_MAX, pll)
415#define strtov(nptr, endptr) strtoull(nptr, endptr, 0)
416LELY_UTIL_DEFINE_LEX_UNSIGNED(
417 unsigned long long, ullong, strtov, ULLONG_MAX, pull)
419LELY_UTIL_DEFINE_LEX_SIGNED(
float, flt, strtof, -HUGE_VALF, HUGE_VALF, pf)
420LELY_UTIL_DEFINE_LEX_SIGNED(
double, dbl, strtod, -HUGE_VAL, HUGE_VAL, pd)
421LELY_UTIL_DEFINE_LEX_SIGNED(
422 long double, ldbl, strtold, -HUGE_VALL, HUGE_VALL, pld)
424#undef LELY_UTIL_DEFINE_LEX_UNSIGNED
425#undef LELY_UTIL_DEFINE_LEX_SIGNED
434 if (i8 < INT_LEAST8_MIN) {
439 }
else if (i8 > INT_LEAST8_MAX) {
446 *pi8 = (int_least8_t)i8;
458 if (i16 < INT_LEAST16_MIN) {
459 i16 = INT_LEAST16_MIN;
462 "int16_t underflow");
463 }
else if (i16 > INT_LEAST16_MAX) {
464 i16 = INT_LEAST16_MAX;
470 *pi16 = (int_least16_t)i16;
485 if (i32 < INT_LEAST32_MIN) {
486 i32 = INT_LEAST32_MIN;
490 "int32_t underflow");
494 }
else if (i32 > INT_LEAST32_MAX) {
495 i32 = INT_LEAST32_MAX;
502 *pi32 = (int_least32_t)i32;
523 || i64 < INT_LEAST64_MIN) {
524 i64 = INT_LEAST64_MIN;
528 "int64_t underflow");
533 || i64 > INT_LEAST64_MAX) {
534 i64 = INT_LEAST64_MAX;
541 *pi64 = (int_least64_t)i64;
553 if (u8 > UINT_LEAST8_MAX) {
554 u8 = UINT_LEAST8_MAX;
560 *pu8 = (uint_least8_t)u8;
567 uint_least16_t *pu16)
572 if (u16 > UINT_LEAST16_MAX) {
573 u16 = UINT_LEAST16_MAX;
576 "uint16_t overflow");
579 *pu16 = (uint_least16_t)u16;
586 uint_least32_t *pu32)
594 if (u32 > UINT_LEAST32_MAX) {
595 u32 = UINT_LEAST32_MAX;
599 "uint32_t overflow");
602 *pu32 = (uint_least32_t)u32;
609 uint_least64_t *pu64)
615 unsigned long long u64;
623 || u64 > UINT_LEAST64_MAX) {
624 u64 = UINT_LEAST64_MAX;
628 "uint64_t overflow");
631 *pu64 = (uint_least64_t)u64;
642 const char *cp = begin;
644 if (delim && *delim) {
645 size_t n = strlen(delim);
646 if ((end && cp + n > end) || strncmp(delim, cp, n))
652 while ((!end || cp < end) && *cp && !
isbreak((
unsigned char)*cp))
659lex_hex(
const char *begin,
const char *end,
struct floc *at,
void *ptr,
664 const char *cp = begin;
666 unsigned char *bp = ptr;
667 unsigned char *endb = bp + (ptr && pn ? *pn : 0);
670 for (; (!end || cp < end) && isxdigit((
unsigned char)*cp); cp++, i++) {
671 if (bp && bp < endb) {
674 *bp++ |=
ctox(*cp) & 0xf;
676 *bp =
ctox(*cp) & 0xf;
689lex_base64(
const char *begin,
const char *end,
struct floc *at,
void *ptr,
694 const char *cp = begin;
696 unsigned char *bp = ptr;
697 unsigned char *endb = bp + (ptr && pn ? *pn : 0);
701 while ((!end || cp < end) && *cp) {
704 case 'A': b = 0;
break;
705 case 'B': b = 1;
break;
706 case 'C': b = 2;
break;
707 case 'D': b = 3;
break;
708 case 'E': b = 4;
break;
709 case 'F': b = 5;
break;
710 case 'G': b = 6;
break;
711 case 'H': b = 7;
break;
712 case 'I': b = 8;
break;
713 case 'J': b = 9;
break;
714 case 'K': b = 10;
break;
715 case 'L': b = 11;
break;
716 case 'M': b = 12;
break;
717 case 'N': b = 13;
break;
718 case 'O': b = 14;
break;
719 case 'P': b = 15;
break;
720 case 'Q': b = 16;
break;
721 case 'R': b = 17;
break;
722 case 'S': b = 18;
break;
723 case 'T': b = 19;
break;
724 case 'U': b = 20;
break;
725 case 'V': b = 21;
break;
726 case 'W': b = 22;
break;
727 case 'X': b = 23;
break;
728 case 'Y': b = 24;
break;
729 case 'Z': b = 25;
break;
730 case 'a': b = 26;
break;
731 case 'b': b = 27;
break;
732 case 'c': b = 28;
break;
733 case 'd': b = 29;
break;
734 case 'e': b = 30;
break;
735 case 'f': b = 31;
break;
736 case 'g': b = 32;
break;
737 case 'h': b = 33;
break;
738 case 'i': b = 34;
break;
739 case 'j': b = 35;
break;
740 case 'k': b = 36;
break;
741 case 'l': b = 37;
break;
742 case 'm': b = 38;
break;
743 case 'n': b = 39;
break;
744 case 'o': b = 40;
break;
745 case 'p': b = 41;
break;
746 case 'q': b = 42;
break;
747 case 'r': b = 43;
break;
748 case 's': b = 44;
break;
749 case 't': b = 45;
break;
750 case 'u': b = 46;
break;
751 case 'v': b = 47;
break;
752 case 'w': b = 48;
break;
753 case 'x': b = 49;
break;
754 case 'y': b = 50;
break;
755 case 'z': b = 51;
break;
756 case '0': b = 52;
break;
757 case '1': b = 53;
break;
758 case '2': b = 54;
break;
759 case '3': b = 55;
break;
760 case '4': b = 56;
break;
761 case '5': b = 57;
break;
762 case '6': b = 58;
break;
763 case '7': b = 59;
break;
764 case '8': b = 60;
break;
765 case '9': b = 61;
break;
766 case '+': b = 62;
break;
767 case '/': b = 63;
break;
770 if (bp && bp < endb) {
772 case 0: s = b << 2;
break;
This header file is part of the utilities library; it contains the diagnostic declarations.
size_t floc_lex(struct floc *at, const char *begin, const char *end)
Increments a file location by reading characters from a memory buffer.
void diag_if(enum diag_severity severity, int errc, const struct floc *at, const char *format,...)
Emits a diagnostic message occurring at a location in a text file.
@ ERRNUM_RANGE
Result too large.
int get_errc(void)
Returns the last (thread-specific) native error code set by a system call or library function.
errnum_t get_errnum(void)
Returns the last (thread-specific) platform-independent error number set by a system call or library ...
void set_errnum(errnum_t errnum)
Sets the current (thread-specific) platform-independent error number to errnum.
#define MIN(a, b)
Returns the minimum of a and b.
size_t lex_c99_i16(const char *begin, const char *end, struct floc *at, int_least16_t *pi16)
Lexes a C99 int_least16_t from a memory buffer.
size_t lex_c99_llong(const char *begin, const char *end, struct floc *at, long long *pll)
Lexes a C99 long long from a memory buffer.
size_t lex_char(int c, const char *begin, const char *end, struct floc *at)
Lexes the specified character from a memory buffer.
size_t lex_ctype(int(*ctype)(int), const char *begin, const char *end, struct floc *at)
Greedily lexes a sequence of characters of the specified class from a memory buffer.
size_t lex_utf8(const char *begin, const char *end, struct floc *at, char32_t *pc32)
Lexes a UTF-8 encoded Unicode character from a memory buffer.
size_t lex_c99_id(const char *begin, const char *end, struct floc *at, char *s, size_t *pn)
Lexes a C99 identifier from a memory buffer.
size_t lex_c99_long(const char *begin, const char *end, struct floc *at, long *pl)
Lexes a C99 long from a memory buffer.
size_t lex_line_comment(const char *delim, const char *begin, const char *end, struct floc *at)
Lexes a single line-comment (excluding the line break) starting with the specified delimiter from a m...
size_t lex_c99_u8(const char *begin, const char *end, struct floc *at, uint_least8_t *pu8)
Lexes a C99 uint_least8_t from a memory buffer.
size_t lex_c99_esc(const char *begin, const char *end, struct floc *at, char32_t *pc32)
Lexes a C99 character escape sequence from a memory buffer if the buffer begins with '\',...
size_t lex_hex(const char *begin, const char *end, struct floc *at, void *ptr, size_t *pn)
Lexes and decodes the hexadecimal representation of binary data from a memory buffer.
size_t lex_base64(const char *begin, const char *end, struct floc *at, void *ptr, size_t *pn)
Lexes and decodes the Base64 representation of binary data from a memory buffer.
size_t lex_c99_str(const char *begin, const char *end, struct floc *at, char *s, size_t *pn)
Lexes a UTF-8 encoded Unicode string from a memory buffer.
size_t lex_c99_i8(const char *begin, const char *end, struct floc *at, int_least8_t *pi8)
Lexes a C99 int_least8_t from a memory buffer.
size_t lex_c99_u32(const char *begin, const char *end, struct floc *at, uint_least32_t *pu32)
Lexes a C99 uint_least32_t from a memory buffer.
size_t lex_c99_u64(const char *begin, const char *end, struct floc *at, uint_least64_t *pu64)
Lexes a C99 uint_least64_t from a memory buffer.
size_t lex_c99_i32(const char *begin, const char *end, struct floc *at, int_least32_t *pi32)
Lexes a C99 int_least32_t from a memory buffer.
size_t lex_break(const char *begin, const char *end, struct floc *at)
Lexes a single line break from a memory buffer.
size_t lex_c99_u16(const char *begin, const char *end, struct floc *at, uint_least16_t *pu16)
Lexes a C99 uint_least16_t from a memory buffer.
size_t lex_c99_ulong(const char *begin, const char *end, struct floc *at, unsigned long *pul)
Lexes a C99 unsigned long from a memory buffer.
size_t lex_c99_pp_num(const char *begin, const char *end, struct floc *at)
Lexes a C99 preprocessing number from a memory buffer.
size_t lex_c99_ullong(const char *begin, const char *end, struct floc *at, unsigned long long *pull)
Lexes a C99 unsigned long long from a memory buffer.
size_t lex_c99_i64(const char *begin, const char *end, struct floc *at, int_least64_t *pi64)
Lexes a C99 int_least64_t from a memory buffer.
This header file is part of the utilities library; it contains the lexer function declarations.
int isodigit(int c)
Returns 1 if c is an octal digit, and 0 otherwise.
int isbreak(int c)
Returns 1 if c is a line break character, and 0 otherwise.
int ctoo(int c)
Returns the octal digit corresponding to the character c.
int ctox(int c)
Returns the hexadecimal digit corresponding to the character c.
This header file is part of the utilities library; it contains the printing function declarations.
size_t print_utf8(char **pbegin, char *end, char32_t c32)
Prints a UTF-8 encoded Unicode character to a memory buffer.
This is the internal header file of the utilities library.
This header file is part of the C11 and POSIX compatibility library; it includes <stdlib....
This header file is part of the C11 and POSIX compatibility library; it includes <string....
A location in a text file.
This header file is part of the C11 and POSIX compatibility library; it includes <uchar....