fix a bunch of problems from parsing test

- ll suffixes (15ll => 15l)
- newlines between string literals to be concatenated
- fix *x++ = 1;
- default
- printing conditionals & decaying conditional operands
This commit is contained in:
pommicket 2022-02-07 17:02:57 -05:00
parent 16bad1636d
commit aba654c36d
4 changed files with 91 additions and 46 deletions

View file

@ -11,7 +11,8 @@
#define EXECUTABLE_SIZE 0x1000000 #define EXECUTABLE_SIZE 0x1000000
; "* 15 nesting levels of compound statements, iteration control structures, and selection control structures" C89 § 2.2.4.1 ; "* 15 nesting levels of compound statements, iteration control structures, and selection control structures" C89 § 2.2.4.1
#define BLOCK_DEPTH_LIMIT 16 ; we need a little more because people don't always Standard code
#define BLOCK_DEPTH_LIMIT 32
; C OPERATOR PRECEDENCE ; C OPERATOR PRECEDENCE
; lowest ; lowest
@ -278,6 +279,7 @@
; - STATEMENT_BREAK - data1,2,3,4 are unused ; - STATEMENT_BREAK - data1,2,3,4 are unused
; - STATEMENT_RETURN - data1 is a pointer to the expression, or 0 if there is none; data2,3,4 are unused ; - STATEMENT_RETURN - data1 is a pointer to the expression, or 0 if there is none; data2,3,4 are unused
; - STATEMENT_CASE - data1 is the value; data2,3,4 are unused ; - STATEMENT_CASE - data1 is the value; data2,3,4 are unused
; - STATEMENT_DEFAULT - data1,2,3,4 are unused
; - STATEMENT_NOOP - data1,2,3,4 are unused ; - STATEMENT_NOOP - data1,2,3,4 are unused
#define STATEMENT_EXPRESSION 1 #define STATEMENT_EXPRESSION 1
#define STATEMENT_LOCAL_DECLARATION 2 #define STATEMENT_LOCAL_DECLARATION 2
@ -293,7 +295,8 @@
#define STATEMENT_BREAK 0xc #define STATEMENT_BREAK 0xc
#define STATEMENT_RETURN 0xd #define STATEMENT_RETURN 0xd
#define STATEMENT_CASE 0xe #define STATEMENT_CASE 0xe
#define STATEMENT_NOOP 0xf #define STATEMENT_DEFAULT 0xf
#define STATEMENT_NOOP 0x10
:keyword_table :keyword_table
@ -705,6 +708,9 @@
:str_endif :str_endif
string endif string endif
byte 0 byte 0
:str_defined
string defined
byte 0
:str___FILE__ :str___FILE__
string __FILE__ string __FILE__
byte 0 byte 0
@ -771,6 +777,6 @@
:str_case :str_case
string case string case
byte 0 byte 0
:str_defined :str_default
string defined string default
byte 0 byte 0

View file

@ -1,11 +1,4 @@
#define funciton( h, \ #include "tests/parse_stb_sprintf.h"
i, j ) h##ello * 2 * i##ello * j##ello /* @TODO: why are line numbers off by 1? */
int hello = 7;
int main() { int main() {
int x = funciton(
h,
h,
h
);
int y = funciton(h,h,h);
} }

View file

@ -382,23 +382,6 @@ function parse_statement
out = *8p_out out = *8p_out
token = *8p_token token = *8p_token
:stmt_label_loop
if *1token != TOKEN_IDENTIFIER goto stmt_label_loop_end
; if second token in statement is a colon, this must be a label
p = token + 16
if *1p == SYMBOL_COLON goto stmt_label
goto stmt_label_loop_end
:stmt_label
write_statement_header(out, STATEMENT_LABEL, token)
out += 8
token += 8
*8out = *8token ; copy label name
out += 32
token += 24 ; skip ident name, and colon
goto stmt_label_loop
:stmt_label_loop_end
c = *1token c = *1token
if c == SYMBOL_SEMICOLON goto stmt_empty if c == SYMBOL_SEMICOLON goto stmt_empty
if c == SYMBOL_LBRACE goto stmt_block if c == SYMBOL_LBRACE goto stmt_block
@ -407,6 +390,7 @@ function parse_statement
if c == KEYWORD_RETURN goto stmt_return if c == KEYWORD_RETURN goto stmt_return
if c == KEYWORD_GOTO goto stmt_goto if c == KEYWORD_GOTO goto stmt_goto
if c == KEYWORD_CASE goto stmt_case if c == KEYWORD_CASE goto stmt_case
if c == KEYWORD_DEFAULT goto stmt_default
if c == KEYWORD_STATIC goto stmt_static_declaration if c == KEYWORD_STATIC goto stmt_static_declaration
if c == KEYWORD_EXTERN goto stmt_extern_declaration if c == KEYWORD_EXTERN goto stmt_extern_declaration
if c == KEYWORD_WHILE goto stmt_while if c == KEYWORD_WHILE goto stmt_while
@ -415,6 +399,12 @@ function parse_statement
if c == KEYWORD_SWITCH goto stmt_switch if c == KEYWORD_SWITCH goto stmt_switch
if c == KEYWORD_IF goto stmt_if if c == KEYWORD_IF goto stmt_if
if *1token != TOKEN_IDENTIFIER goto stmt_not_label
; if second token in statement is a colon, this must be a label
p = token + 16
if *1p == SYMBOL_COLON goto stmt_label
:stmt_not_label
b = token_is_type(token) b = token_is_type(token)
if b != 0 goto stmt_local_declaration if b != 0 goto stmt_local_declaration
@ -438,6 +428,14 @@ function parse_statement
; @NONSTANDARD ; @NONSTANDARD
string Local extern declarations are not supported. string Local extern declarations are not supported.
byte 0 byte 0
:stmt_label
write_statement_header(out, STATEMENT_LABEL, token)
out += 8
token += 8
*8out = *8token ; copy label name
out += 32
token += 24 ; skip ident name, and colon
goto parse_statement_ret
:stmt_switch :stmt_switch
write_statement_header(out, STATEMENT_SWITCH, token) write_statement_header(out, STATEMENT_SWITCH, token)
token += 16 token += 16
@ -780,6 +778,19 @@ function parse_statement
token_error(token, .str_case_no_colon) token_error(token, .str_case_no_colon)
:str_case_no_colon :str_case_no_colon
string No : after case. string No : after case.
byte 0
:stmt_default
write_statement_header(out, STATEMENT_DEFAULT, token)
token += 16
out += 40
if *1token != SYMBOL_COLON goto default_no_colon
token += 16
goto parse_statement_ret
:default_no_colon
token_error(token, .str_default_no_colon)
:str_default_no_colon
string No : after default.
byte 0
:stmt_return :stmt_return
write_statement_header(out, STATEMENT_RETURN, token) write_statement_header(out, STATEMENT_RETURN, token)
out += 8 out += 8
@ -916,6 +927,7 @@ function print_statement_with_depth
if c == STATEMENT_GOTO goto print_stmt_goto if c == STATEMENT_GOTO goto print_stmt_goto
if c == STATEMENT_LABEL goto print_stmt_label if c == STATEMENT_LABEL goto print_stmt_label
if c == STATEMENT_CASE goto print_stmt_case if c == STATEMENT_CASE goto print_stmt_case
if c == STATEMENT_DEFAULT goto print_stmt_default
if c == STATEMENT_WHILE goto print_stmt_while if c == STATEMENT_WHILE goto print_stmt_while
if c == STATEMENT_DO goto print_stmt_do if c == STATEMENT_DO goto print_stmt_do
if c == STATEMENT_IF goto print_stmt_if if c == STATEMENT_IF goto print_stmt_if
@ -1089,7 +1101,10 @@ function print_statement_with_depth
putn_signed(dat1) putn_signed(dat1)
putcln(':) putcln(':)
return return
:print_stmt_default
puts(.str_default)
putcln(':)
return
; parse a global variable's initializer ; parse a global variable's initializer
; e.g. int x[5] = {1+8, 2, 3, 4, 5}; ; e.g. int x[5] = {1+8, 2, 3, 4, 5};
; advances *p_token to the token right after the initializer ; advances *p_token to the token right after the initializer
@ -2399,21 +2414,17 @@ function parse_expression
; look for the operator with the lowest precedence not in brackets ; look for the operator with the lowest precedence not in brackets
depth = 0 ; paren/square bracket depth depth = 0 ; paren/square bracket depth
first_token = 1
p = tokens p = tokens
best = 0 best = 0
best_precedence = 1000 best_precedence = 1000
goto expr_find_operator_loop_first
:expr_find_operator_loop :expr_find_operator_loop
first_token = 0
:expr_find_operator_loop_first
if p >= tokens_end goto expr_find_operator_loop_end if p >= tokens_end goto expr_find_operator_loop_end
n = p n = p
c = *1p c = *1p
p += 16 p += 16
if depth > 0 goto expr_findop_not_new_best if depth > 0 goto expr_findop_not_new_best
if depth < 0 goto expr_too_many_closing_brackets if depth < 0 goto expr_too_many_closing_brackets
a = operator_precedence(n, first_token) a = operator_precedence(n, tokens)
n = a n = a
if a == 0xe0 goto select_leftmost ; ensure that the leftmost unary operator is processed first if a == 0xe0 goto select_leftmost ; ensure that the leftmost unary operator is processed first
b = operator_right_associative(c) b = operator_right_associative(c)
@ -2668,8 +2679,8 @@ function parse_expression
if c == EXPRESSION_LOGICAL_NOT goto unary_type_logical_not if c == EXPRESSION_LOGICAL_NOT goto unary_type_logical_not
if c == EXPRESSION_ADDRESS_OF goto unary_address_of if c == EXPRESSION_ADDRESS_OF goto unary_address_of
if c == EXPRESSION_DEREFERENCE goto unary_dereference if c == EXPRESSION_DEREFERENCE goto unary_dereference
if c == EXPRESSION_PRE_INCREMENT goto unary_type_arithmetic_nopromote if c == EXPRESSION_PRE_INCREMENT goto unary_type_scalar_nopromote
if c == EXPRESSION_PRE_DECREMENT goto unary_type_arithmetic_nopromote if c == EXPRESSION_PRE_DECREMENT goto unary_type_scalar_nopromote
fputs(2, .str_unop_this_shouldnt_happen) fputs(2, .str_unop_this_shouldnt_happen)
exit(1) exit(1)
:str_unop_this_shouldnt_happen :str_unop_this_shouldnt_happen
@ -2730,8 +2741,8 @@ function parse_expression
if *1p > TYPE_DOUBLE goto unary_bad_type if *1p > TYPE_DOUBLE goto unary_bad_type
*4type = type_promotion(*4a) *4type = type_promotion(*4a)
return out return out
:unary_type_arithmetic_nopromote :unary_type_scalar_nopromote
if *1p > TYPE_DOUBLE goto unary_bad_type if *1p > TYPE_POINTER goto unary_bad_type
*4type = *4a *4type = *4a
return out return out
:unary_bad_type :unary_bad_type
@ -2863,13 +2874,17 @@ function parse_expression
; okay, q now points to the : ; okay, q now points to the :
*1out = EXPRESSION_CONDITIONAL *1out = EXPRESSION_CONDITIONAL
out += 8 out += 8
a = out + 4
out = parse_expression(tokens, best, out) out = parse_expression(tokens, best, out)
type_decay_array_to_pointer(*4a)
a = out + 4 ; type of left branch of conditional a = out + 4 ; type of left branch of conditional
best += 16 best += 16
out = parse_expression(best, p, out) out = parse_expression(best, p, out)
type_decay_array_to_pointer(*4a)
b = out + 4 ; type of right branch of conditional b = out + 4 ; type of right branch of conditional
p += 16 p += 16
out = parse_expression(p, tokens_end, out) out = parse_expression(p, tokens_end, out)
type_decay_array_to_pointer(*4b)
p = types + *4a p = types + *4a
if *1p == TYPE_STRUCT goto parse_cond_ltype if *1p == TYPE_STRUCT goto parse_cond_ltype
if *1p == TYPE_VOID goto parse_cond_ltype if *1p == TYPE_VOID goto parse_cond_ltype
@ -3630,17 +3645,21 @@ function type_promotion
; return precedence of given operator token, or 0xffff if not an operator ; return precedence of given operator token, or 0xffff if not an operator
function operator_precedence function operator_precedence
argument token argument token
argument is_first argument first_token
local p_op
local op local op
local b local b
if is_first != 0 goto operator_precedence_unary if token == first_token goto operator_precedence_unary
; if an operator is preceded by another, it must be a unary operator, e.g. ; if an operator is preceded by another, it must be a unary operator, e.g.
; in 5 + *x, * is a unary operator ; in `5 + *x`, * is a unary operator
op = token - 16 p_op = token - 16
op = *1op :figure_out_arity
op = *1p_op
if op == SYMBOL_RPAREN goto figre_out_rparen_arity if op == SYMBOL_RPAREN goto figre_out_rparen_arity
if op == SYMBOL_PLUS_PLUS goto figure_out_bimodal_arity
if op == SYMBOL_MINUS_MINUS goto figure_out_bimodal_arity
op = is_operator(op) op = is_operator(op)
; if an operator is immediately followed by another (including lparen), the second must be ; if an operator is immediately followed by another (including lparen), the second must be
@ -3719,6 +3738,14 @@ function operator_precedence
if b == 0 goto return_0xffff if b == 0 goto return_0xffff
goto return_0xe0 ; it's a cast goto return_0xe0 ; it's a cast
:figure_out_bimodal_arity
; ++ and -- can act either as unary or binary operators.
if p_op == first_token goto operator_precedence_unary ; e.g. ++*x
; reverse one further to figure out which it is.
p_op -= 16
goto figure_out_arity
:figre_out_rparen_arity :figre_out_rparen_arity
; given that the token before this one is a right-parenthesis, figure out if ; given that the token before this one is a right-parenthesis, figure out if
; this is a unary or binary operator. this is (annoyingly) necessary, because: ; this is a unary or binary operator. this is (annoyingly) necessary, because:
@ -3941,6 +3968,7 @@ function print_expression
if c == EXPRESSION_LOGICAL_NOT goto print_logical_not if c == EXPRESSION_LOGICAL_NOT goto print_logical_not
if c == EXPRESSION_CAST goto print_cast if c == EXPRESSION_CAST goto print_cast
if c == EXPRESSION_CALL goto print_call if c == EXPRESSION_CALL goto print_call
if c == EXPRESSION_CONDITIONAL goto print_conditional
b = binop_expression_type_to_symbol(c) b = binop_expression_type_to_symbol(c)
if b != 0 goto print_expr_binop if b != 0 goto print_expr_binop
@ -4000,6 +4028,20 @@ function print_expression
expression = print_expression(expression) ; 2nd operand expression = print_expression(expression) ; 2nd operand
putc(41) putc(41)
return expression return expression
:print_conditional
putc(40)
expression += 8
expression = print_expression(expression)
putc(32)
putc('?)
putc(32)
expression = print_expression(expression)
putc(32)
putc(':)
putc(32)
expression = print_expression(expression)
putc(41)
return expression
:print_expr_dot :print_expr_dot
putc(40) putc(40)
expression += 8 expression += 8

View file

@ -261,7 +261,7 @@ function tokenize
goto string_literal_char_loop goto string_literal_char_loop
:string_literal_char_loop_end :string_literal_char_loop_end
pptoken_skip(&in) ; skip closing " pptoken_skip(&in) ; skip closing "
pptoken_skip_spaces(&in) pptoken_skip_whitespace(&in, &line_number)
if *1in == '" goto string_literal_loop ; string concatenation, e.g. "Hello, " "world!" if *1in == '" goto string_literal_loop ; string concatenation, e.g. "Hello, " "world!"
*1p = 0 ; null terminator *1p = 0 ; null terminator
p += 1 p += 1
@ -560,12 +560,16 @@ function read_number_suffix
c = *1s c = *1s
if c == 'u goto number_suffix_ul if c == 'u goto number_suffix_ul
if c == 'U goto number_suffix_ul if c == 'U goto number_suffix_ul
if c == 'l goto number_suffix_l ; handle ll suffix (even though it's C99)
if c == 'L goto number_suffix_l
if c != 0 goto bad_number_suffix if c != 0 goto bad_number_suffix
suffix = NUMBER_SUFFIX_L suffix = NUMBER_SUFFIX_L
goto number_suffix_return goto number_suffix_return
:number_suffix_ul :number_suffix_ul
s += 1 s += 1
c = *1s c = *1s
if c == 'l goto number_suffix_l ; handle ll suffix (even though it's C99)
if c == 'L goto number_suffix_l
if c != 0 goto bad_number_suffix if c != 0 goto bad_number_suffix
suffix = NUMBER_SUFFIX_UL suffix = NUMBER_SUFFIX_UL
goto number_suffix_return goto number_suffix_return