more parsing tests, more fixes

- #elif now actually works properly (hopefully)
- fixed some array decaying bugs (hopefully), and generally simplified typing
This commit is contained in:
pommicket 2022-02-08 20:09:08 -05:00
parent 26fccf7cd7
commit 0a2d05bdd5
5 changed files with 7694 additions and 69 deletions

View file

@ -1,10 +1,3 @@
; @TODO: if we have,
; 1 extern int blah;
; 2 ...
; n int blah;
; give `blah` an address on line 1, then ignore declaration on line n
; add 24 + 16 = 40 to the stack pointer to put argc, argv in the right place ; add 24 + 16 = 40 to the stack pointer to put argc, argv in the right place
byte 0x48 byte 0x48
byte 0x81 byte 0x81
@ -289,9 +282,16 @@ function main
close(output_fd) close(output_fd)
ident_list_printx64(global_variables) ident_list_printx64(global_variables)
puts(.str_types_bytes_used)
putnln(types_bytes_used)
exit(0) exit(0)
:str_types_bytes_used
string types_bytes_used:
byte 32
byte 0
:mmap_output_fd_failed :mmap_output_fd_failed
fputs(2, .str_mmap_output_fd_failed) fputs(2, .str_mmap_output_fd_failed)
exit(1) exit(1)

View file

@ -1,10 +1,17 @@
#include "tests/parse_stb_sprintf.h" #include "tests/parse_stb_image.h"
/* #if 0 */ /* #if 0 */
/* #elif 1 */ /* #elif 1 */
/* int f(){} */ /* int f(){} */
/* #endif */ /* #endif */
/* @TODO: some more testing of #if/#elif/#else/#endif */
/* @TODO: ensure that
struct Something {
int a;
} x,y;
works
*/
/* @TODO: why are line numbers off by 1? */
int main() { int main() {
} }

View file

@ -175,7 +175,20 @@ function parse_toplevel_declaration
goto parse_tld_ret goto parse_tld_ret
:tl_decl_no_ident :tl_decl_no_ident
token_error(prefix_end, .str_tl_decl_no_ident) ; this might actually be okay, e.g.
; struct Something { int x, y; }
if *1base_type == KEYWORD_STRUCT goto tldni_basetype_ok
if *1base_type == KEYWORD_UNION goto tldni_basetype_ok
if *1base_type == KEYWORD_ENUM goto tldni_basetype_ok
goto tldni_bad
:tldni_basetype_ok
if prefix != prefix_end goto tldni_bad ; e.g. struct Something {...} *;
if *1prefix_end != SYMBOL_SEMICOLON goto tldni_bad ; you can't do struct Something { ...}, struct SomethingElse {...};
parse_base_type(base_type) ; this will properly define the struct/union/enum and any enumerators
token = prefix_end
goto tl_decl_loop_done
:tldni_bad
token_error(prefix_end, .str_tl_decl_no_ident)
:str_tl_decl_no_ident :str_tl_decl_no_ident
string No identifier in top-level declaration. string No identifier in top-level declaration.
byte 0 byte 0
@ -413,7 +426,9 @@ function parse_statement
out += 8 out += 8
p = token_next_semicolon_not_in_brackets(token) p = token_next_semicolon_not_in_brackets(token)
*8out = expressions_end *8out = expressions_end
b = expressions_end + 4 ; type of expression
expressions_end = parse_expression(token, p, expressions_end) expressions_end = parse_expression(token, p, expressions_end)
type_decay_array_to_pointer_in_place(*4b)
out += 32 out += 32
token = p + 16 token = p + 16
goto parse_statement_ret goto parse_statement_ret
@ -689,7 +704,7 @@ function parse_statement
out -= 24 out -= 24
expressions_end = parse_expression(token, n, p) expressions_end = parse_expression(token, n, p)
p += 4 p += 4
type_decay_array_to_pointer(*4p) ; fix typing for `int[] x = {5,6}; int *y = x;` type_decay_array_to_pointer_in_place(*4p) ; fix typing for `int[] x = {5,6}; int *y = x;`
token = n token = n
goto local_decl_continue goto local_decl_continue
:local_init_lbrace :local_init_lbrace
@ -711,14 +726,15 @@ function parse_statement
:str_local_redeclaration :str_local_redeclaration
string Redeclaration of local variable. string Redeclaration of local variable.
byte 0 byte 0
:local_decl_no_ident
:local_decl_no_ident_bad
token_error(token, .str_local_decl_no_ident)
:str_local_decl_no_ident
string No identifier in declaration.
byte 0
:local_decl_loop_end :local_decl_loop_end
token += 16 ; skip semicolon token += 16 ; skip semicolon
goto parse_statement_ret goto parse_statement_ret
:local_decl_no_ident
token_error(token, .str_local_decl_no_ident)
:str_local_decl_no_ident
string No identifier in declaration.
byte 0
:stmt_static_declaration :stmt_static_declaration
p = block_static_variables p = block_static_variables
p += block_depth < 3 p += block_depth < 3
@ -2344,6 +2360,13 @@ function type_copy_ids
memcpy(dest, src, n) memcpy(dest, src, n)
return n return n
function type_create_copy
argument type
local copy
copy = types_bytes_used
types_bytes_used += type_copy_ids(types_bytes_used, type)
return copy
function type_create_pointer function type_create_pointer
argument type argument type
local id local id
@ -2479,15 +2502,17 @@ function parse_expression
if c == EXPRESSION_ARROW goto parse_expr_member if c == EXPRESSION_ARROW goto parse_expr_member
a = out + 4 ; type of first operand a = out + 4 ; type of first operand
out = parse_expression(tokens, best, out) ; first operand out = parse_expression(tokens, best, out) ; first operand
a = *4a
p = best + 16 p = best + 16
if c == EXPRESSION_CALL goto parse_call if c == EXPRESSION_CALL goto parse_call
b = out + 4 ; type of second operand
if c != EXPRESSION_SUBSCRIPT goto binary_not_subscript if c != EXPRESSION_SUBSCRIPT goto binary_not_subscript
tokens_end -= 16 tokens_end -= 16
if *1tokens_end != SYMBOL_RSQUARE goto unrecognized_expression if *1tokens_end != SYMBOL_RSQUARE goto unrecognized_expression
:binary_not_subscript :binary_not_subscript
b = out + 4 ; type of second operand
out = parse_expression(p, tokens_end, out) ; second operand out = parse_expression(p, tokens_end, out) ; second operand
b = *4b
if c == EXPRESSION_LSHIFT goto type_shift if c == EXPRESSION_LSHIFT goto type_shift
if c == EXPRESSION_RSHIFT goto type_shift if c == EXPRESSION_RSHIFT goto type_shift
@ -2530,68 +2555,72 @@ function parse_expression
byte 0 byte 0
:type_plus :type_plus
type_decay_array_to_pointer(*4a) type_decay_array_to_pointer_in_place(a)
type_decay_array_to_pointer(*4b) type_decay_array_to_pointer_in_place(b)
p = types + *4a p = types + a
if *1p == TYPE_POINTER goto type_binary_left ; pointer plus integer if *1p == TYPE_POINTER goto type_binary_left ; pointer plus integer
p = types + *4b p = types + b
if *1p == TYPE_POINTER goto type_binary_right ; integer plus pointer if *1p == TYPE_POINTER goto type_binary_right ; integer plus pointer
goto type_binary_usual goto type_binary_usual
:type_minus :type_minus
type_decay_array_to_pointer(*4a) type_decay_array_to_pointer_in_place(a)
type_decay_array_to_pointer(*4b) type_decay_array_to_pointer_in_place(b)
p = types + *4a p = types + a
if *1p == TYPE_POINTER goto type_minus_left_ptr if *1p == TYPE_POINTER goto type_minus_left_ptr
goto type_binary_usual goto type_binary_usual
:type_minus_left_ptr :type_minus_left_ptr
p = types + *4b p = types + b
if *1p == TYPE_POINTER goto type_long ; pointer difference if *1p == TYPE_POINTER goto type_long ; pointer difference
goto type_binary_left ; pointer minus integer goto type_binary_left ; pointer minus integer
:type_subscript :type_subscript
type_decay_array_to_pointer(*4a) type_decay_array_to_pointer_in_place(a)
p = types + *4a p = types + b
if *1p > TYPE_UNSIGNED_LONG goto subscript_non_integer
p = types + a
if *1p == TYPE_POINTER goto type_subscript_pointer if *1p == TYPE_POINTER goto type_subscript_pointer
if *1p == TYPE_ARRAY goto type_subscript_array
goto subscript_bad_type goto subscript_bad_type
:type_subscript_pointer :type_subscript_pointer
*4type = *4a + 1 b = a + 1
return out *4type = type_create_copy(b)
:type_subscript_array
*4type = *4a + 9
return out return out
:subscript_bad_type :subscript_bad_type
token_error(tokens, .str_subscript_bad_type) token_error(tokens, .str_subscript_bad_type)
:str_subscript_bad_type :str_subscript_bad_type
string Subscript of non-pointer type. string Subscript of non-pointer type.
byte 0 byte 0
:subscript_non_integer
token_error(tokens, .str_subscript_non_integer)
:str_subscript_non_integer
string Subscript index is not an integer.
byte 0
; apply the "usual conversions" ; apply the "usual conversions"
:type_binary_usual :type_binary_usual
*4type = expr_binary_type_usual_conversions(tokens, *4a, *4b) *4type = expr_binary_type_usual_conversions(tokens, a, b)
return out return out
; like type_binary_usual, but the operands must be integers ; like type_binary_usual, but the operands must be integers
:type_binary_usual_integer :type_binary_usual_integer
*4type = expr_binary_type_usual_conversions(tokens, *4a, *4b) *4type = expr_binary_type_usual_conversions(tokens, a, b)
p = types + *4type p = types + *4type
if *1p >= TYPE_FLOAT goto expr_binary_bad_types if *1p >= TYPE_FLOAT goto expr_binary_bad_types
return out return out
:type_binary_left_integer :type_binary_left_integer
p = types + *4a p = types + a
if *1p >= TYPE_FLOAT goto expr_binary_bad_types if *1p >= TYPE_FLOAT goto expr_binary_bad_types
p = types + *4b p = types + b
if *1p >= TYPE_FLOAT goto expr_binary_bad_types if *1p >= TYPE_FLOAT goto expr_binary_bad_types
goto type_binary_left goto type_binary_left
:type_binary_left :type_binary_left
*4type = *4a *4type = a
return out return out
:type_binary_right :type_binary_right
*4type = *4b *4type = b
return out return out
:type_shift :type_shift
p = types + *4a p = types + a
if *1p >= TYPE_FLOAT goto expr_binary_bad_types if *1p >= TYPE_FLOAT goto expr_binary_bad_types
p = types + *4b p = types + b
if *1p >= TYPE_FLOAT goto expr_binary_bad_types if *1p >= TYPE_FLOAT goto expr_binary_bad_types
*4type = type_promotion(*4a) *4type = type_promotion(a)
return out return out
; the type here is just int ; the type here is just int
:type_int :type_int
@ -2601,13 +2630,13 @@ function parse_expression
*4type = TYPE_LONG *4type = TYPE_LONG
return out return out
:expr_binary_bad_types :expr_binary_bad_types
bad_types_to_operator(tokens, *4a, *4b) bad_types_to_operator(tokens, a, b)
:parse_call :parse_call
local arg_type local arg_type
local param_type local param_type
; type call ; type call
b = types + *4a b = types + a
if *1b == TYPE_FUNCTION goto type_call_cont if *1b == TYPE_FUNCTION goto type_call_cont
if *1b != TYPE_POINTER goto calling_nonfunction if *1b != TYPE_POINTER goto calling_nonfunction
b += 1 ; handle calling function pointer b += 1 ; handle calling function pointer
@ -2632,7 +2661,7 @@ function parse_expression
goto call_arg_type_cont goto call_arg_type_cont
:arg_is_varargs :arg_is_varargs
type_promote_float_to_double(*4arg_type) type_promote_float_to_double(*4arg_type)
type_decay_array_to_pointer(*4arg_type) type_decay_array_to_pointer_in_place(*4arg_type)
:call_arg_type_cont :call_arg_type_cont
p = n p = n
@ -2672,7 +2701,7 @@ function parse_expression
a = out + 4 ; type of operand a = out + 4 ; type of operand
p = tokens + 16 p = tokens + 16
out = parse_expression(p, tokens_end, out) out = parse_expression(p, tokens_end, out)
p = types + *4a a = *4a
if c == EXPRESSION_BITWISE_NOT goto unary_type_integral if c == EXPRESSION_BITWISE_NOT goto unary_type_integral
if c == EXPRESSION_UNARY_PLUS goto unary_type_promote if c == EXPRESSION_UNARY_PLUS goto unary_type_promote
if c == EXPRESSION_UNARY_MINUS goto unary_type_promote if c == EXPRESSION_UNARY_MINUS goto unary_type_promote
@ -2718,37 +2747,43 @@ function parse_expression
string Bad cast. string Bad cast.
byte 0 byte 0
:unary_address_of :unary_address_of
*4type = type_create_pointer(*4a) *4type = type_create_pointer(a)
return out return out
:unary_dereference :unary_dereference
type_decay_array_to_pointer(*4a) type_decay_array_to_pointer_in_place(a)
p = types + a
if *2p == TYPE2_FUNCTION_POINTER goto type_deref_fpointer if *2p == TYPE2_FUNCTION_POINTER goto type_deref_fpointer
if *1p != TYPE_POINTER goto unary_bad_type if *1p != TYPE_POINTER goto unary_bad_type
*4type = *4a + 1 b = a + 1
*4type = type_create_copy(b)
return out return out
:type_deref_fpointer :type_deref_fpointer
*4type = *4a *4type = a
return out return out
:unary_type_logical_not :unary_type_logical_not
type_decay_array_to_pointer(*4a) type_decay_array_to_pointer_in_place(a)
p = types + a
if *1p > TYPE_POINTER goto unary_bad_type if *1p > TYPE_POINTER goto unary_bad_type
*4type = TYPE_INT *4type = TYPE_INT
return out return out
:unary_type_integral :unary_type_integral
p = types + a
if *1p >= TYPE_FLOAT goto unary_bad_type if *1p >= TYPE_FLOAT goto unary_bad_type
goto unary_type_promote goto unary_type_promote
:unary_type_promote :unary_type_promote
p = types + a
if *1p > TYPE_DOUBLE goto unary_bad_type if *1p > TYPE_DOUBLE goto unary_bad_type
*4type = type_promotion(*4a) *4type = type_promotion(a)
return out return out
:unary_type_scalar_nopromote :unary_type_scalar_nopromote
p = types + a
if *1p > TYPE_POINTER goto unary_bad_type if *1p > TYPE_POINTER goto unary_bad_type
*4type = *4a *4type = a
return out return out
:unary_bad_type :unary_bad_type
fprint_token_location(1, tokens) fprint_token_location(1, tokens)
puts(.str_unary_bad_type) puts(.str_unary_bad_type)
print_type(*4a) print_type(a)
putc(10) putc(10)
exit(1) exit(1)
:str_unary_bad_type :str_unary_bad_type
@ -2876,15 +2911,15 @@ function parse_expression
out += 8 out += 8
a = out + 4 a = out + 4
out = parse_expression(tokens, best, out) out = parse_expression(tokens, best, out)
type_decay_array_to_pointer(*4a) type_decay_array_to_pointer_in_place(*4a)
a = out + 4 ; type of left branch of conditional a = out + 4 ; type of left branch of conditional
best += 16 best += 16
out = parse_expression(best, p, out) out = parse_expression(best, p, out)
type_decay_array_to_pointer(*4a) type_decay_array_to_pointer_in_place(*4a)
b = out + 4 ; type of right branch of conditional b = out + 4 ; type of right branch of conditional
p += 16 p += 16
out = parse_expression(p, tokens_end, out) out = parse_expression(p, tokens_end, out)
type_decay_array_to_pointer(*4b) type_decay_array_to_pointer_in_place(*4b)
p = types + *4a p = types + *4a
if *1p == TYPE_STRUCT goto parse_cond_ltype if *1p == TYPE_STRUCT goto parse_cond_ltype
if *1p == TYPE_VOID goto parse_cond_ltype if *1p == TYPE_VOID goto parse_cond_ltype
@ -3095,7 +3130,7 @@ function parse_expression
; e.g. ; e.g.
; char s[] = "hello"; ; char s[] = "hello";
; char *t = s + 3; /* s "decays" into a pointer */ ; char *t = s + 3; /* s "decays" into a pointer */
function type_decay_array_to_pointer function type_decay_array_to_pointer_in_place
argument type argument type
local dest local dest
local src local src
@ -3108,6 +3143,7 @@ function type_decay_array_to_pointer
type_copy_ids(dest, src) type_copy_ids(dest, src)
return return
; change type to `double` if it's `float` ; change type to `double` if it's `float`
; in C, float arguments have to be passed as double for varargs ; in C, float arguments have to be passed as double for varargs
; there is also a rule that char/short/int are passed as ints, but we don't need to worry about it since we're passing everything as >=8 bytes. ; there is also a rule that char/short/int are passed as ints, but we don't need to worry about it since we're passing everything as >=8 bytes.
@ -4174,10 +4210,13 @@ function print_type
if c == TYPE_STRUCT goto print_type_struct if c == TYPE_STRUCT goto print_type_struct
if c == TYPE_FUNCTION goto print_type_function if c == TYPE_FUNCTION goto print_type_function
fputs(2, .str_bad_print_type) fputs(2, .str_bad_print_type)
putnln(type)
putnln(c)
putnln(types_bytes_used)
exit(1) exit(1)
:str_bad_print_type :str_bad_print_type
string Bad type passed to print_type. string Bad type passed to print_type:
byte 10 byte 32
byte 0 byte 0
:print_type_void :print_type_void
return puts(.str_void) return puts(.str_void)

View file

@ -869,7 +869,7 @@ function translation_phase_4
if p != 0 goto process_pptoken ; macro is defined; keep processing if p != 0 goto process_pptoken ; macro is defined; keep processing
p = look_up_function_macro(macro_name) p = look_up_function_macro(macro_name)
if p != 0 goto process_pptoken ; macro is defined; keep processing if p != 0 goto process_pptoken ; macro is defined; keep processing
preprocessor_skip_if(filename, &line_number, &in, &out) preprocessor_skip_if(filename, &line_number, &in, &out, 0)
goto phase4_line_noinc goto phase4_line_noinc
:pp_directive_ifndef :pp_directive_ifndef
pptoken_skip(&in) pptoken_skip(&in)
@ -884,12 +884,13 @@ function translation_phase_4
if p != 0 goto ifndef_skip ; macro is defined; skip if p != 0 goto ifndef_skip ; macro is defined; skip
goto process_pptoken ; macro not defined; keep processing goto process_pptoken ; macro not defined; keep processing
:ifndef_skip :ifndef_skip
preprocessor_skip_if(filename, &line_number, &in, &out) preprocessor_skip_if(filename, &line_number, &in, &out, 0)
goto phase4_line_noinc goto phase4_line_noinc
:pp_directive_else :pp_directive_else
; assume we got here from an if, so skip this ; assume we got here from an if, so skip this
pptoken_skip_to_newline(&in) pptoken_skip_to_newline(&in)
preprocessor_skip_if(filename, &line_number, &in, &out) ; this might actually be an elif, so skip all the way to #endif.
preprocessor_skip_if(filename, &line_number, &in, &out, 1)
goto phase4_line_noinc goto phase4_line_noinc
:pp_directive_endif :pp_directive_endif
; assume we got here from an if/elif/else, just ignore it. ; assume we got here from an if/elif/else, just ignore it.
@ -979,12 +980,13 @@ function translation_phase_4
:pp_if_idents0_done :pp_if_idents0_done
;print_tokens(if_tokens, p) ;print_tokens(if_tokens, p)
parse_expression(if_tokens, p, if_expr) parse_expression(if_tokens, p, if_expr)
;print_expression(if_expr) print_expression(if_expr)
putc(10)
evaluate_constant_expression(p, if_expr, &b) evaluate_constant_expression(p, if_expr, &b)
if b == 0 goto pp_directive_if0 if b == 0 goto pp_directive_if0
goto pp_if_done goto pp_if_done
:pp_directive_if0 :pp_directive_if0
preprocessor_skip_if(filename, &line_number, &in, &out) preprocessor_skip_if(filename, &line_number, &in, &out, 0)
goto pp_if_done goto pp_if_done
:pp_bad_defined :pp_bad_defined
token_error(p, .str_pp_bad_defined) token_error(p, .str_pp_bad_defined)
@ -1041,9 +1043,9 @@ function translation_phase_4
; skip body of #if / #elif / #else. This will advance *p_in to: ; skip body of #if / #elif / #else. This will advance *p_in to:
; - right at the next unmatched #elif, replacing it with a #if ; - right after the next #endif
; OR - right after the next #else ; OR if to_endif == 0 - right at the next unmatched #elif, replacing it with a #if
; OR - right after the next #endif ; OR if to_endif == 0 - right after the next #else
; whichever comes first ; whichever comes first
; @NONSTANDARD: this doesn't properly handle #endif's, etc. which appear in a different file from their corresponding #if's. ; @NONSTANDARD: this doesn't properly handle #endif's, etc. which appear in a different file from their corresponding #if's.
; NOTE: p_out is needed for newlines ; NOTE: p_out is needed for newlines
@ -1052,6 +1054,7 @@ function preprocessor_skip_if
argument p_line_number argument p_line_number
argument p_in argument p_in
argument p_out argument p_out
argument to_endif
local in local in
local out local out
local p local p
@ -1097,6 +1100,7 @@ function preprocessor_skip_if
goto preprocessor_skip_if_loop ; some unimportant directive goto preprocessor_skip_if_loop ; some unimportant directive
:skip_if_elif :skip_if_elif
if if_depth > 0 goto preprocessor_skip_if_loop if if_depth > 0 goto preprocessor_skip_if_loop
if to_endif != 0 goto preprocessor_skip_if_loop
; replace #elif with #if (kinda sketchy) ; replace #elif with #if (kinda sketchy)
*1in = '# *1in = '#
in += 1 in += 1
@ -1112,9 +1116,12 @@ function preprocessor_skip_if
goto preprocessor_skip_if_loop goto preprocessor_skip_if_loop
:skip_if_endif :skip_if_endif
if_depth -= 1 if_depth -= 1
; (fallthrough) pptoken_skip(&in) ; skip endif
if prev_if_depth > 0 goto preprocessor_skip_if_loop
goto preprocessor_skip_if_loop_end
:skip_if_else :skip_if_else
pptoken_skip(&in) ; skip endif/else pptoken_skip(&in) ; skip else
if to_endif != 0 goto preprocessor_skip_if_loop
if prev_if_depth > 0 goto preprocessor_skip_if_loop if prev_if_depth > 0 goto preprocessor_skip_if_loop
goto preprocessor_skip_if_loop_end goto preprocessor_skip_if_loop_end
:preprocessor_skip_if_loop_end :preprocessor_skip_if_loop_end

7572
05/tests/parse_stb_image.h Normal file

File diff suppressed because it is too large Load diff