25 #define LELY_UTIL_LEX_INLINE extern inline
38 lex_char(
int c,
const char *begin,
const char *end,
struct floc *at)
42 const char *cp = begin;
44 if ((end && cp >= end) || *cp++ != c)
47 return floc_lex(at, begin, begin + 1);
51 lex_ctype(
int (*ctype)(
int),
const char *begin,
const char *end,
57 const char *cp = begin;
59 while ((!end || cp < end) && ctype((
unsigned char)*cp))
70 const char *cp = begin;
72 if ((end && cp >= end) || !
isbreak((
unsigned char)*cp))
76 if (*cp++ ==
'\r' && (!end || cp < end) && *cp ==
'\n')
83 lex_utf8(
const char *begin,
const char *end,
struct floc *at, char32_t *pc32)
87 const char *cp = begin;
95 unsigned char c = *cp++;
100 }
else if ((c & 0xc0) == 0x80) {
105 while ((!end || cp < end)
106 && ((
unsigned char)*cp & 0xc0) == 0x80)
109 "a UTF-8 sequence cannot begin with a continuation byte");
111 }
else if ((c & 0xe0) == 0xc0) {
115 }
else if ((c & 0xf0) == 0xe0) {
119 }
else if ((c & 0xf8) == 0xf0) {
131 if ((end && cp > end) || ((
unsigned char)*cp & 0xc0) != 0x80)
133 c32 = (c32 << 6) | ((
unsigned char)*cp & 0x3f);
139 if ((c32 >= 0xd800 && c32 <= 0xdfff) || c32 > 0x10ffff) {
141 "illegal Unicode code point U+%" PRIX32, c32);
164 const char *cp = begin;
166 if ((end && cp >= end) || !(*cp ==
'_' || isalpha((
unsigned char)*cp)))
170 while ((!end || cp < end)
171 && (*cp ==
'_' || isalnum((
unsigned char)*cp)))
176 memcpy(s, begin,
MIN((
size_t)(cp - begin), *pn));
188 const char *cp = begin;
190 if (end && cp >= end)
194 return lex_utf8(begin, end, at, pc32);
197 if (end && cp >= end)
204 while ((!end || cp < end) &&
isodigit((
unsigned char)*cp)) {
205 c32 = (c32 << 3) |
ctoo(*cp);
210 case '\'': c32 =
'\'';
break;
211 case '\"': c32 =
'\"';
break;
212 case '\\': c32 =
'\\';
break;
213 case 'a': c32 =
'\a';
break;
214 case 'b': c32 =
'\b';
break;
215 case 'f': c32 =
'\f';
break;
216 case 'n': c32 =
'\n';
break;
217 case 'r': c32 =
'\r';
break;
218 case 't': c32 =
'\t';
break;
219 case 'v': c32 =
'\v';
break;
221 while ((!end || cp < end)
222 && isxdigit((
unsigned char)*cp)) {
223 c32 = (c32 << 4) |
ctox(*cp);
230 isgraph((
unsigned char)*cp)
231 ?
"illegal escape sequence '\\%c'"
232 :
"illegal escape sequence '\\\\%o'",
259 const char *cp = begin;
262 char *ends = s + (s && pn ? *pn : 0);
264 while ((!end || cp < end) && *cp && *cp !=
'\"'
265 && !
isbreak((
unsigned char)*cp)) {
289 const char *cp = begin;
292 if ((!end || cp < end) && (*cp ==
'+' || *cp ==
'-'))
297 if ((!end || cp < end) && isdigit((
unsigned char)*cp)) {
299 }
else if ((!end || end - cp >= 2) && cp[0] ==
'.'
300 && isdigit((
unsigned char)cp[1])) {
306 while ((!end || cp < end) && *cp) {
307 if (*cp ==
'e' || *cp ==
'E' || *cp ==
'p' || *cp ==
'P') {
310 if ((!end || cp < end) && (*cp ==
'+' || *cp ==
'-'))
312 }
else if (*cp ==
'.' || *cp ==
'_'
313 || isalnum((
unsigned char)*cp)) {
323 #define LELY_UTIL_DEFINE_LEX_SIGNED(type, suffix, strtov, min, max, pname) \
324 size_t lex_c99_##suffix(const char *begin, const char *end, \
325 struct floc *at, type *pname) \
327 size_t chars = lex_c99_pp_num(begin, end, NULL); \
331 char *buf = strndup(begin, chars); \
333 diag_if(DIAG_ERROR, errno2c(errno), at, \
334 "unable to duplicate string"); \
342 type result = strtov(buf, &endptr); \
343 chars = endptr - buf; \
347 if (errno == ERANGE && result == min) { \
348 set_errnum(ERRNUM_RANGE); \
349 diag_if(DIAG_WARNING, get_errc(), at, \
350 #type " underflow"); \
351 } else if (errno == ERANGE && result == max) { \
352 set_errnum(ERRNUM_RANGE); \
353 diag_if(DIAG_WARNING, get_errc(), at, \
354 #type " overflow"); \
355 } else if (!errno) { \
362 return floc_lex(at, begin, begin + chars); \
365 #define LELY_UTIL_DEFINE_LEX_UNSIGNED(type, suffix, strtov, max, pname) \
366 size_t lex_c99_##suffix(const char *begin, const char *end, \
367 struct floc *at, type *pname) \
369 size_t chars = lex_c99_pp_num(begin, end, NULL); \
373 char *buf = strndup(begin, chars); \
375 diag_if(DIAG_ERROR, errno2c(errno), at, \
376 "unable to duplicate string"); \
384 type result = strtov(buf, &endptr); \
385 chars = endptr - buf; \
389 if (errno == ERANGE && result == max) { \
390 set_errnum(ERRNUM_RANGE); \
391 diag_if(DIAG_WARNING, get_errc(), at, \
392 #type " overflow"); \
393 } else if (!errno) { \
400 return floc_lex(at, begin, begin + chars); \
403 #define strtov(nptr, endptr) strtol(nptr, endptr, 0)
404 LELY_UTIL_DEFINE_LEX_SIGNED(
long,
long, strtov, LONG_MIN, LONG_MAX, pl)
406 #define strtov(nptr, endptr) strtoul(nptr, endptr, 0)
407 LELY_UTIL_DEFINE_LEX_UNSIGNED(
unsigned long, ulong, strtov, ULONG_MAX, pul)
409 #define strtov(nptr, endptr) strtoll(nptr, endptr, 0)
410 LELY_UTIL_DEFINE_LEX_SIGNED(
long long, llong, strtov, LLONG_MIN, LLONG_MAX, pll)
412 #define strtov(nptr, endptr) strtoull(nptr, endptr, 0)
413 LELY_UTIL_DEFINE_LEX_UNSIGNED(
414 unsigned long long, ullong, strtov, ULLONG_MAX, pull)
416 LELY_UTIL_DEFINE_LEX_SIGNED(
float, flt, strtof, -HUGE_VALF, HUGE_VALF, pf)
417 LELY_UTIL_DEFINE_LEX_SIGNED(
double, dbl, strtod, -HUGE_VAL, HUGE_VAL, pd)
418 LELY_UTIL_DEFINE_LEX_SIGNED(
419 long double, ldbl, strtold, -HUGE_VALL, HUGE_VALL, pld)
421 #undef LELY_UTIL_DEFINE_LEX_UNSIGNED
422 #undef LELY_UTIL_DEFINE_LEX_SIGNED
436 }
else if (i8 > INT8_MAX) {
443 *pi8 = (int_least8_t)i8;
455 if (i16 < INT16_MIN) {
459 "int16_t underflow");
460 }
else if (i16 > INT16_MAX) {
467 *pi16 = (int_least16_t)i16;
482 if (i32 < INT32_MIN) {
487 "int32_t underflow");
491 }
else if (i32 > INT32_MAX) {
499 *pi32 = (int_least32_t)i32;
520 || i64 < INT64_MIN) {
525 "int64_t underflow");
530 || i64 > INT64_MAX) {
538 *pi64 = (int_least64_t)i64;
550 if (u8 > UINT8_MAX) {
557 *pu8 = (uint_least8_t)u8;
564 uint_least16_t *pu16)
569 if (u16 > UINT16_MAX) {
573 "uint16_t overflow");
576 *pu16 = (uint_least16_t)u16;
583 uint_least32_t *pu32)
591 if (u32 > UINT32_MAX) {
596 "uint32_t overflow");
599 *pu32 = (uint_least32_t)u32;
606 uint_least64_t *pu64)
612 unsigned long long u64;
620 || u64 > UINT64_MAX) {
625 "uint64_t overflow");
628 *pu64 = (uint_least64_t)u64;
639 const char *cp = begin;
641 if (delim && *delim) {
642 size_t n = strlen(delim);
643 if ((end && cp + n > end) || strncmp(delim, cp, n))
649 while ((!end || cp < end) && *cp && !
isbreak((
unsigned char)*cp))
656 lex_hex(
const char *begin,
const char *end,
struct floc *at,
void *ptr,
661 const char *cp = begin;
663 unsigned char *bp = ptr;
664 unsigned char *endb = bp + (ptr && pn ? *pn : 0);
667 for (; (!end || cp < end) && isxdigit((
unsigned char)*cp); cp++, i++) {
668 if (bp && bp < endb) {
671 *bp++ |=
ctox(*cp) & 0xf;
673 *bp =
ctox(*cp) & 0xf;
686 lex_base64(
const char *begin,
const char *end,
struct floc *at,
void *ptr,
691 const char *cp = begin;
693 unsigned char *bp = ptr;
694 unsigned char *endb = bp + (ptr && pn ? *pn : 0);
698 while ((!end || cp < end) && *cp) {
701 case 'A': b = 0;
break;
702 case 'B': b = 1;
break;
703 case 'C': b = 2;
break;
704 case 'D': b = 3;
break;
705 case 'E': b = 4;
break;
706 case 'F': b = 5;
break;
707 case 'G': b = 6;
break;
708 case 'H': b = 7;
break;
709 case 'I': b = 8;
break;
710 case 'J': b = 9;
break;
711 case 'K': b = 10;
break;
712 case 'L': b = 11;
break;
713 case 'M': b = 12;
break;
714 case 'N': b = 13;
break;
715 case 'O': b = 14;
break;
716 case 'P': b = 15;
break;
717 case 'Q': b = 16;
break;
718 case 'R': b = 17;
break;
719 case 'S': b = 18;
break;
720 case 'T': b = 19;
break;
721 case 'U': b = 20;
break;
722 case 'V': b = 21;
break;
723 case 'W': b = 22;
break;
724 case 'X': b = 23;
break;
725 case 'Y': b = 24;
break;
726 case 'Z': b = 25;
break;
727 case 'a': b = 26;
break;
728 case 'b': b = 27;
break;
729 case 'c': b = 28;
break;
730 case 'd': b = 29;
break;
731 case 'e': b = 30;
break;
732 case 'f': b = 31;
break;
733 case 'g': b = 32;
break;
734 case 'h': b = 33;
break;
735 case 'i': b = 34;
break;
736 case 'j': b = 35;
break;
737 case 'k': b = 36;
break;
738 case 'l': b = 37;
break;
739 case 'm': b = 38;
break;
740 case 'n': b = 39;
break;
741 case 'o': b = 40;
break;
742 case 'p': b = 41;
break;
743 case 'q': b = 42;
break;
744 case 'r': b = 43;
break;
745 case 's': b = 44;
break;
746 case 't': b = 45;
break;
747 case 'u': b = 46;
break;
748 case 'v': b = 47;
break;
749 case 'w': b = 48;
break;
750 case 'x': b = 49;
break;
751 case 'y': b = 50;
break;
752 case 'z': b = 51;
break;
753 case '0': b = 52;
break;
754 case '1': b = 53;
break;
755 case '2': b = 54;
break;
756 case '3': b = 55;
break;
757 case '4': b = 56;
break;
758 case '5': b = 57;
break;
759 case '6': b = 58;
break;
760 case '7': b = 59;
break;
761 case '8': b = 60;
break;
762 case '9': b = 61;
break;
763 case '+': b = 62;
break;
764 case '/': b = 63;
break;
767 if (bp && bp < endb) {
769 case 0: s = b << 2;
break;
This header file is part of the utilities library; it contains the diagnostic declarations.
size_t floc_lex(struct floc *at, const char *begin, const char *end)
Increments a file location by reading characters from a memory buffer.
void diag_if(enum diag_severity severity, int errc, const struct floc *at, const char *format,...)
Emits a diagnostic message occurring at a location in a text file.
@ ERRNUM_RANGE
Result too large.
int get_errc(void)
Returns the last (thread-specific) native error code set by a system call or library function.
errnum_t get_errnum(void)
Returns the last (thread-specific) platform-independent error number set by a system call or library ...
void set_errnum(errnum_t errnum)
Sets the current (thread-specific) platform-independent error number to errnum.
#define MIN(a, b)
Returns the minimum of a and b.
size_t lex_c99_i16(const char *begin, const char *end, struct floc *at, int_least16_t *pi16)
Lexes a C99 int_least16_t from a memory buffer.
size_t lex_c99_llong(const char *begin, const char *end, struct floc *at, long long *pll)
Lexes a C99 long long from a memory buffer.
size_t lex_char(int c, const char *begin, const char *end, struct floc *at)
Lexes the specified character from a memory buffer.
size_t lex_ctype(int(*ctype)(int), const char *begin, const char *end, struct floc *at)
Greedily lexes a sequence of characters of the specified class from a memory buffer.
size_t lex_utf8(const char *begin, const char *end, struct floc *at, char32_t *pc32)
Lexes a UTF-8 encoded Unicode character from a memory buffer.
size_t lex_c99_id(const char *begin, const char *end, struct floc *at, char *s, size_t *pn)
Lexes a C99 identifier from a memory buffer.
size_t lex_c99_long(const char *begin, const char *end, struct floc *at, long *pl)
Lexes a C99 long from a memory buffer.
size_t lex_line_comment(const char *delim, const char *begin, const char *end, struct floc *at)
Lexes a single line-comment (excluding the line break) starting with the specified delimiter from a m...
size_t lex_c99_u8(const char *begin, const char *end, struct floc *at, uint_least8_t *pu8)
Lexes a C99 uint_least8_t from a memory buffer.
size_t lex_c99_esc(const char *begin, const char *end, struct floc *at, char32_t *pc32)
Lexes a C99 character escape sequence from a memory buffer if the buffer begins with '\',...
size_t lex_hex(const char *begin, const char *end, struct floc *at, void *ptr, size_t *pn)
Lexes and decodes the hexadecimal representation of binary data from a memory buffer.
size_t lex_base64(const char *begin, const char *end, struct floc *at, void *ptr, size_t *pn)
Lexes and decodes the Base64 representation of binary data from a memory buffer.
size_t lex_c99_str(const char *begin, const char *end, struct floc *at, char *s, size_t *pn)
Lexes a UTF-8 encoded Unicode string from a memory buffer.
size_t lex_c99_i8(const char *begin, const char *end, struct floc *at, int_least8_t *pi8)
Lexes a C99 int_least8_t from a memory buffer.
size_t lex_c99_u32(const char *begin, const char *end, struct floc *at, uint_least32_t *pu32)
Lexes a C99 uint_least32_t from a memory buffer.
size_t lex_c99_u64(const char *begin, const char *end, struct floc *at, uint_least64_t *pu64)
Lexes a C99 uint_least64_t from a memory buffer.
size_t lex_c99_i32(const char *begin, const char *end, struct floc *at, int_least32_t *pi32)
Lexes a C99 int_least32_t from a memory buffer.
size_t lex_break(const char *begin, const char *end, struct floc *at)
Lexes a single line break from a memory buffer.
size_t lex_c99_u16(const char *begin, const char *end, struct floc *at, uint_least16_t *pu16)
Lexes a C99 uint_least16_t from a memory buffer.
size_t lex_c99_ulong(const char *begin, const char *end, struct floc *at, unsigned long *pul)
Lexes a C99 unsigned long from a memory buffer.
size_t lex_c99_pp_num(const char *begin, const char *end, struct floc *at)
Lexes a C99 preprocessing number from a memory buffer.
size_t lex_c99_ullong(const char *begin, const char *end, struct floc *at, unsigned long long *pull)
Lexes a C99 unsigned long long from a memory buffer.
size_t lex_c99_i64(const char *begin, const char *end, struct floc *at, int_least64_t *pi64)
Lexes a C99 int_least64_t from a memory buffer.
This header file is part of the utilities library; it contains the lexer function declarations.
int isodigit(int c)
Returns 1 if c is an octal digit, and 0 otherwise.
int isbreak(int c)
Returns 1 if c is a line break character, and 0 otherwise.
int ctoo(int c)
Returns the octal digit corresponding to the character c.
int ctox(int c)
Returns the hexadecimal digit corresponding to the character c.
This header file is part of the utilities library; it contains the printing function declarations.
size_t print_utf8(char **pbegin, char *end, char32_t c32)
Prints a UTF-8 encoded Unicode character to a memory buffer.
This is the internal header file of the utilities library.
This header file is part of the C11 and POSIX compatibility library; it includes <stdlib....
This header file is part of the C11 and POSIX compatibility library; it includes <string....
A location in a text file.
This header file is part of the C11 and POSIX compatibility library; it includes <uchar....