parsing C is annoying

This commit is contained in:
pommicket 2022-02-03 19:19:12 -05:00
parent 04c5e1af8a
commit 3e6cacfb7c
2 changed files with 67 additions and 52 deletions

101
05/main.c
View file

@ -1,54 +1,57 @@
/*typedef struct {
typedef struct {
int i[41];
long double d;
} (*x)(void);
} (*x___)(void);
/* typedef enum X { */
/* R,S,T */
/* } *Foo[sizeof(unsigned long)]; */
/* typedef int A[T]; */
/* */
/* typedef struct A { */
/* int x, y; */
/* long double c; */
/* unsigned long d; */
/* char e[3]; */
/* long f; */
/* } A; */
/* */
/* typedef union B{ */
/* int x; */
/* struct { */
/* int y; */
/* struct {long z; } c; */
/* } c; */
/* }B; */
/* */
/* typedef int x[sizeof(A)+sizeof"hello"]; */
/* typedef int y[sizeof(struct B)]; */
/* */
/* static unsigned int x={55}; */
/* static char *s = "hello"; */
/* static char *t = "goodbye"; */
/* static char u[8] = "hellothe"; */
/* static char v[100] = "re my"; */
/* static char w[] = "friendly"; */
/* static char x_[] = "hi"; */
/* typedef int A[sizeof x_ + sizeof u]; */
/* */
/* static int a[5] = {1,2,3}; */
/* static char b[6][7] = {{'a'},{'b'},{'c'},{'d'},{'e'}}; */
/* static char __b[][7] = {{'a'},"hello",'r'}; */
/* static int _u = sizeof __b; */
typedef enum X {
R,S,T
} *Foo[sizeof(unsigned long)];
typedef int A___[T];
/* struct { */
/* int a; */
/* long b; */
/* } x1[] = {0x1234567890, 1ul<<60|1ul<<3, 77}; */
/* int y1 = 0x12345678; */
typedef int R[sizeof *(int*)"hello"];
typedef struct A {
int x, y;
long double c;
unsigned long d;
char e[3];
long f;
} A;
/* struct { */
/* int x[2], y; */
/* } test = {3, 5}; */
/* unsigned marker = 0xdeadbeef; */
typedef union B{
int x;
struct {
int y;
struct {long z; } c;
} c;
}B;
typedef int QQQ[sizeof(A)+sizeof"hello"];
typedef int RRR[sizeof(struct B)];
static unsigned int x={55};
static char *s = "hello";
static char *t = "goodbye";
static char u[8] = "hellothe";
static char v[100] = "re my";
static char w[] = "friendly";
static char x_[] = "hi";
typedef int A_[sizeof x_ + sizeof u];
static int a[5] = {1,2,3};
static char b[6][7] = {{'a'},{'b'},{'c'},{'d'},{'e'}};
static char __b[][7] = {{'a'},"hello",'r'};
static int _u = sizeof __b;
struct {
int a;
long b;
} x1[] = {0x1234567890, 1ul<<60|1ul<<3, 77};
int y1 = 0x12345678;
struct {
int x[2], y;
} test[] = {3, 5,0x1234,0x4321};
typedef int Blah[sizeof((B *)0)->c.y];
unsigned marker = 0xdeadbeef;
/* typedef int X[sizeof(int)+4]; */

View file

@ -1329,7 +1329,7 @@ function parse_expression
local first_token
:parse_expression_top
print_tokens(tokens, tokens_end)
;print_tokens(tokens, tokens_end)
type = out + 4
@ -1676,8 +1676,15 @@ function parse_expression
parse_type_declarators(sizeof_prefix, sizeof_suffix, sizeof_suffix, sizeof_suffix_end)
parse_base_type(sizeof_base_type)
if *1p != SYMBOL_RPAREN goto bad_expression ; e.g. sizeof(int ,)
p += 16
if p != tokens_end goto stuff_after_sizeof_type
*8out = type_sizeof(a)
goto parse_sizeof_finish
:stuff_after_sizeof_type
token_error(sizeof_suffix_end, .str_stuff_after_sizeof_type)
:str_stuff_after_sizeof_type
string Unrecognized stuff after sizeof(T).
byte 0
:parse_sizeof_expr
; it's an expression, e.g. sizeof(x+3)
local temp
@ -2575,15 +2582,20 @@ function operator_precedence
:figre_out_rparen_arity
; given that the token before this one is a right-parenthesis, figure out if
; this is a unary or binary operator. this is (annoyingly) necessary, because:
; (int)-x; /* cast processed first */
; (y)-x; /* subtraction processed first */
; (int)-x; /* cast processed first */
; sizeof(int)-x; /* subtraction processed first */
local p
p = token - 16
token_reverse_to_matching_lparen(&p)
p += 16
b = token_is_type(p)
if b != 0 goto operator_precedence_unary ; e.g. (int)-x;
if b != 0 goto rparen_might_be_cast
goto operator_precedence_binary ; e.g. (y)-x;
:rparen_might_be_cast
p -= 32
if *1p != KEYWORD_SIZEOF goto operator_precedence_unary ; e.g. (int)-x
goto operator_precedence_binary ; e.g. sizeof(int)-x
function unary_op_to_expression_type
argument op