Lely core libraries
2.2.5
|
Go to the documentation of this file.
25 #define LELY_UTIL_LEX_INLINE extern inline
38 lex_char(
int c,
const char *begin,
const char *end,
struct floc *at)
42 const char *cp = begin;
44 if ((end && cp >= end) || *cp++ != c)
47 return floc_lex(at, begin, begin + 1);
51 lex_ctype(
int (*ctype)(
int),
const char *begin,
const char *end,
57 const char *cp = begin;
59 while ((!end || cp < end) && ctype((
unsigned char)*cp))
70 const char *cp = begin;
72 if ((end && cp >= end) || !
isbreak((
unsigned char)*cp))
76 if (*cp++ ==
'\r' && (!end || cp < end) && *cp ==
'\n')
83 lex_utf8(
const char *begin,
const char *end,
struct floc *at, char32_t *pc32)
87 const char *cp = begin;
95 unsigned char c = *cp++;
100 }
else if ((c & 0xc0) == 0x80) {
105 while ((!end || cp < end)
106 && ((
unsigned char)*cp & 0xc0) == 0x80)
109 "a UTF-8 sequence cannot begin with a continuation byte");
111 }
else if ((c & 0xe0) == 0xc0) {
115 }
else if ((c & 0xf0) == 0xe0) {
119 }
else if ((c & 0xf8) == 0xf0) {
131 if ((end && cp > end) || ((
unsigned char)*cp & 0xc0) != 0x80)
133 c32 = (c32 << 6) | ((
unsigned char)*cp & 0x3f);
139 if ((c32 >= 0xd800 && c32 <= 0xdfff) || c32 > 0x10ffff) {
141 "illegal Unicode code point U+%" PRIX32, c32);
164 const char *cp = begin;
166 if ((end && cp >= end) || !(*cp ==
'_' || isalpha((
unsigned char)*cp)))
170 while ((!end || cp < end)
171 && (*cp ==
'_' || isalnum((
unsigned char)*cp)))
176 memcpy(s, begin,
MIN((
size_t)(cp - begin), *pn));
188 const char *cp = begin;
190 if (end && cp >= end)
194 return lex_utf8(begin, end, at, pc32);
197 if (end && cp >= end)
204 while ((!end || cp < end) &&
isodigit((
unsigned char)*cp)) {
205 c32 = (c32 << 3) |
ctoo(*cp);
210 case '\'': c32 =
'\'';
break;
211 case '\"': c32 =
'\"';
break;
212 case '\\': c32 =
'\\';
break;
213 case 'a': c32 =
'\a';
break;
214 case 'b': c32 =
'\b';
break;
215 case 'f': c32 =
'\f';
break;
216 case 'n': c32 =
'\n';
break;
217 case 'r': c32 =
'\r';
break;
218 case 't': c32 =
'\t';
break;
219 case 'v': c32 =
'\v';
break;
221 while ((!end || cp < end)
222 && isxdigit((
unsigned char)*cp)) {
223 c32 = (c32 << 4) |
ctox(*cp);
230 isgraph((
unsigned char)*cp)
231 ?
"illegal escape sequence '\\%c'"
232 :
"illegal escape sequence '\\\\%o'",
259 const char *cp = begin;
262 char *ends = s + (s && pn ? *pn : 0);
264 while ((!end || cp < end) && *cp && *cp !=
'\"'
265 && !
isbreak((
unsigned char)*cp)) {
289 const char *cp = begin;
292 if ((!end || cp < end) && (*cp ==
'+' || *cp ==
'-'))
297 if ((!end || cp < end) && isdigit((
unsigned char)*cp)) {
299 }
else if ((!end || end - cp >= 2) && cp[0] ==
'.'
300 && isdigit((
unsigned char)cp[1])) {
306 while ((!end || cp < end) && *cp) {
307 if (*cp ==
'e' || *cp ==
'E' || *cp ==
'p' || *cp ==
'P') {
310 if ((!end || cp < end) && (*cp ==
'+' || *cp ==
'-'))
312 }
else if (*cp ==
'.' || *cp ==
'_'
313 || isalnum((
unsigned char)*cp)) {
323 #define LELY_UTIL_DEFINE_LEX_SIGNED(type, suffix, strtov, min, max, pname) \
324 size_t lex_c99_##suffix(const char *begin, const char *end, \
325 struct floc *at, type *pname) \
327 size_t chars = lex_c99_pp_num(begin, end, NULL); \
331 char *buf = strndup(begin, chars); \
333 diag_if(DIAG_ERROR, errno2c(errno), at, \
334 "unable to duplicate string"); \
342 type result = strtov(buf, &endptr); \
343 chars = endptr - buf; \
347 if (errno == ERANGE && result == min) { \
348 set_errnum(ERRNUM_RANGE); \
349 diag_if(DIAG_WARNING, get_errc(), at, \
350 #type " underflow"); \
351 } else if (errno == ERANGE && result == max) { \
352 set_errnum(ERRNUM_RANGE); \
353 diag_if(DIAG_WARNING, get_errc(), at, \
354 #type " overflow"); \
355 } else if (!errno) { \
362 return floc_lex(at, begin, begin + chars); \
365 #define LELY_UTIL_DEFINE_LEX_UNSIGNED(type, suffix, strtov, max, pname) \
366 size_t lex_c99_##suffix(const char *begin, const char *end, \
367 struct floc *at, type *pname) \
369 size_t chars = lex_c99_pp_num(begin, end, NULL); \
373 char *buf = strndup(begin, chars); \
375 diag_if(DIAG_ERROR, errno2c(errno), at, \
376 "unable to duplicate string"); \
384 type result = strtov(buf, &endptr); \
385 chars = endptr - buf; \
389 if (errno == ERANGE && result == max) { \
390 set_errnum(ERRNUM_RANGE); \
391 diag_if(DIAG_WARNING, get_errc(), at, \
392 #type " overflow"); \
393 } else if (!errno) { \
400 return floc_lex(at, begin, begin + chars); \
403 #define strtov(nptr, endptr) strtol(nptr, endptr, 0)
404 LELY_UTIL_DEFINE_LEX_SIGNED(
long,
long, strtov, LONG_MIN, LONG_MAX, pl)
406 #define strtov(nptr, endptr) strtoul(nptr, endptr, 0)
407 LELY_UTIL_DEFINE_LEX_UNSIGNED(
unsigned long, ulong, strtov, ULONG_MAX, pul)
409 #define strtov(nptr, endptr) strtoll(nptr, endptr, 0)
410 LELY_UTIL_DEFINE_LEX_SIGNED(
long long, llong, strtov, LLONG_MIN, LLONG_MAX, pll)
412 #define strtov(nptr, endptr) strtoull(nptr, endptr, 0)
413 LELY_UTIL_DEFINE_LEX_UNSIGNED(
414 unsigned long long, ullong, strtov, ULLONG_MAX, pull)
416 LELY_UTIL_DEFINE_LEX_SIGNED(
float, flt, strtof, -HUGE_VALF, HUGE_VALF, pf)
417 LELY_UTIL_DEFINE_LEX_SIGNED(
double, dbl, strtod, -HUGE_VAL, HUGE_VAL, pd)
418 LELY_UTIL_DEFINE_LEX_SIGNED(
419 long double, ldbl, strtold, -HUGE_VALL, HUGE_VALL, pld)
421 #undef LELY_UTIL_DEFINE_LEX_UNSIGNED
422 #undef LELY_UTIL_DEFINE_LEX_SIGNED
436 }
else if (i8 > INT8_MAX) {
443 *pi8 = (int_least8_t)i8;
455 if (i16 < INT16_MIN) {
459 "int16_t underflow");
460 }
else if (i16 > INT16_MAX) {
467 *pi16 = (int_least16_t)i16;
482 if (i32 < INT32_MIN) {
487 "int32_t underflow");
491 }
else if (i32 > INT32_MAX) {
499 *pi32 = (int_least32_t)i32;
520 || i64 < INT64_MIN) {
525 "int64_t underflow");
530 || i64 > INT64_MAX) {
538 *pi64 = (int_least64_t)i64;
550 if (u8 > UINT8_MAX) {
557 *pu8 = (uint_least8_t)u8;
564 uint_least16_t *pu16)
569 if (u16 > UINT16_MAX) {
573 "uint16_t overflow");
576 *pu16 = (uint_least16_t)u16;
583 uint_least32_t *pu32)
591 if (u32 > UINT32_MAX) {
596 "uint32_t overflow");
599 *pu32 = (uint_least32_t)u32;
606 uint_least64_t *pu64)
612 unsigned long long u64;
620 || u64 > UINT64_MAX) {
625 "uint64_t overflow");
628 *pu64 = (uint_least64_t)u64;
639 const char *cp = begin;
641 if (delim && *delim) {
642 size_t n = strlen(delim);
643 if ((end && cp + n > end) || strncmp(delim, cp, n))
649 while ((!end || cp < end) && *cp && !
isbreak((
unsigned char)*cp))
656 lex_hex(
const char *begin,
const char *end,
struct floc *at,
void *ptr,
661 const char *cp = begin;
663 unsigned char *bp = ptr;
664 unsigned char *endb = bp + (ptr && pn ? *pn : 0);
667 for (; (!end || cp < end) && isxdigit((
unsigned char)*cp); cp++, i++) {
668 if (bp && bp < endb) {
671 *bp++ |=
ctox(*cp) & 0xf;
673 *bp =
ctox(*cp) & 0xf;
686 lex_base64(
const char *begin,
const char *end,
struct floc *at,
void *ptr,
691 const char *cp = begin;
693 unsigned char *bp = ptr;
694 unsigned char *endb = bp + (ptr && pn ? *pn : 0);
698 while ((!end || cp < end) && *cp) {
701 case 'A': b = 0;
break;
702 case 'B': b = 1;
break;
703 case 'C': b = 2;
break;
704 case 'D': b = 3;
break;
705 case 'E': b = 4;
break;
706 case 'F': b = 5;
break;
707 case 'G': b = 6;
break;
708 case 'H': b = 7;
break;
709 case 'I': b = 8;
break;
710 case 'J': b = 9;
break;
711 case 'K': b = 10;
break;
712 case 'L': b = 11;
break;
713 case 'M': b = 12;
break;
714 case 'N': b = 13;
break;
715 case 'O': b = 14;
break;
716 case 'P': b = 15;
break;
717 case 'Q': b = 16;
break;
718 case 'R': b = 17;
break;
719 case 'S': b = 18;
break;
720 case 'T': b = 19;
break;
721 case 'U': b = 20;
break;
722 case 'V': b = 21;
break;
723 case 'W': b = 22;
break;
724 case 'X': b = 23;
break;
725 case 'Y': b = 24;
break;
726 case 'Z': b = 25;
break;
727 case 'a': b = 26;
break;
728 case 'b': b = 27;
break;
729 case 'c': b = 28;
break;
730 case 'd': b = 29;
break;
731 case 'e': b = 30;
break;
732 case 'f': b = 31;
break;
733 case 'g': b = 32;
break;
734 case 'h': b = 33;
break;
735 case 'i': b = 34;
break;
736 case 'j': b = 35;
break;
737 case 'k': b = 36;
break;
738 case 'l': b = 37;
break;
739 case 'm': b = 38;
break;
740 case 'n': b = 39;
break;
741 case 'o': b = 40;
break;
742 case 'p': b = 41;
break;
743 case 'q': b = 42;
break;
744 case 'r': b = 43;
break;
745 case 's': b = 44;
break;
746 case 't': b = 45;
break;
747 case 'u': b = 46;
break;
748 case 'v': b = 47;
break;
749 case 'w': b = 48;
break;
750 case 'x': b = 49;
break;
751 case 'y': b = 50;
break;
752 case 'z': b = 51;
break;
753 case '0': b = 52;
break;
754 case '1': b = 53;
break;
755 case '2': b = 54;
break;
756 case '3': b = 55;
break;
757 case '4': b = 56;
break;
758 case '5': b = 57;
break;
759 case '6': b = 58;
break;
760 case '7': b = 59;
break;
761 case '8': b = 60;
break;
762 case '9': b = 61;
break;
763 case '+': b = 62;
break;
764 case '/': b = 63;
break;
767 if (bp && bp < endb) {
769 case 0: s = b << 2;
break;
size_t lex_c99_long(const char *begin, const char *end, struct floc *at, long *pl)
Lexes a C99 long from a memory buffer.
size_t lex_c99_u32(const char *begin, const char *end, struct floc *at, uint_least32_t *pu32)
Lexes a C99 uint_least32_t from a memory buffer.
int isodigit(int c)
Returns 1 if c is an octal digit, and 0 otherwise.
int ctoo(int c)
Returns the octal digit corresponding to the character c.
#define MIN(a, b)
Returns the minimum of a and b.
size_t lex_c99_u8(const char *begin, const char *end, struct floc *at, uint_least8_t *pu8)
Lexes a C99 uint_least8_t from a memory buffer.
size_t lex_utf8(const char *begin, const char *end, struct floc *at, char32_t *pc32)
Lexes a UTF-8 encoded Unicode character from a memory buffer.
size_t lex_c99_id(const char *begin, const char *end, struct floc *at, char *s, size_t *pn)
Lexes a C99 identifier from a memory buffer.
int get_errc(void)
Returns the last (thread-specific) native error code set by a system call or library function.
size_t lex_c99_ulong(const char *begin, const char *end, struct floc *at, unsigned long *pul)
Lexes a C99 unsigned long from a memory buffer.
size_t lex_hex(const char *begin, const char *end, struct floc *at, void *ptr, size_t *pn)
Lexes and decodes the hexadecimal representation of binary data from a memory buffer.
size_t lex_c99_i32(const char *begin, const char *end, struct floc *at, int_least32_t *pi32)
Lexes a C99 int_least32_t from a memory buffer.
A location in a text file.
size_t lex_ctype(int(*ctype)(int), const char *begin, const char *end, struct floc *at)
Greedily lexes a sequence of characters of the specified class from a memory buffer.
int isbreak(int c)
Returns 1 if c is a line break character, and 0 otherwise.
size_t floc_lex(struct floc *at, const char *begin, const char *end)
Increments a file location by reading characters from a memory buffer.
size_t lex_char(int c, const char *begin, const char *end, struct floc *at)
Lexes the specified character from a memory buffer.
@ ERRNUM_RANGE
Result too large.
size_t lex_c99_u16(const char *begin, const char *end, struct floc *at, uint_least16_t *pu16)
Lexes a C99 uint_least16_t from a memory buffer.
size_t lex_line_comment(const char *delim, const char *begin, const char *end, struct floc *at)
Lexes a single line-comment (excluding the line break) starting with the specified delimiter from a m...
void set_errnum(errnum_t errnum)
Sets the current (thread-specific) platform-independent error number to errnum.
size_t lex_break(const char *begin, const char *end, struct floc *at)
Lexes a single line break from a memory buffer.
size_t lex_c99_u64(const char *begin, const char *end, struct floc *at, uint_least64_t *pu64)
Lexes a C99 uint_least64_t from a memory buffer.
size_t lex_c99_ullong(const char *begin, const char *end, struct floc *at, unsigned long long *pull)
Lexes a C99 unsigned long long from a memory buffer.
int ctox(int c)
Returns the hexadecimal digit corresponding to the character c.
size_t lex_c99_str(const char *begin, const char *end, struct floc *at, char *s, size_t *pn)
Lexes a UTF-8 encoded Unicode string from a memory buffer.
size_t lex_c99_pp_num(const char *begin, const char *end, struct floc *at)
Lexes a C99 preprocessing number from a memory buffer.
size_t lex_c99_i64(const char *begin, const char *end, struct floc *at, int_least64_t *pi64)
Lexes a C99 int_least64_t from a memory buffer.
size_t lex_c99_i16(const char *begin, const char *end, struct floc *at, int_least16_t *pi16)
Lexes a C99 int_least16_t from a memory buffer.
size_t lex_c99_i8(const char *begin, const char *end, struct floc *at, int_least8_t *pi8)
Lexes a C99 int_least8_t from a memory buffer.
size_t lex_c99_llong(const char *begin, const char *end, struct floc *at, long long *pll)
Lexes a C99 long long from a memory buffer.
size_t print_utf8(char **pbegin, char *end, char32_t c32)
Prints a UTF-8 encoded Unicode character to a memory buffer.
void diag_if(enum diag_severity severity, int errc, const struct floc *at, const char *format,...)
Emits a diagnostic message occurring at a location in a text file.
errnum_t get_errnum(void)
Returns the last (thread-specific) platform-independent error number set by a system call or library ...
size_t lex_c99_esc(const char *begin, const char *end, struct floc *at, char32_t *pc32)
Lexes a C99 character escape sequence from a memory buffer if the buffer begins with '\',...
size_t lex_base64(const char *begin, const char *end, struct floc *at, void *ptr, size_t *pn)
Lexes and decodes the Base64 representation of binary data from a memory buffer.