lang-bootstrap/05/parse.b

2466 lines
67 KiB
Brainfuck
Raw Normal View History

2022-01-24 13:22:16 -05:00
; is this token the start of a type?
function token_is_type
argument token
local c
c = *1token
if c == TOKEN_IDENTIFIER goto token_is_ident_type
if c == KEYWORD_UNSIGNED goto return_1
if c == KEYWORD_CHAR goto return_1
if c == KEYWORD_SHORT goto return_1
if c == KEYWORD_INT goto return_1
if c == KEYWORD_LONG goto return_1
if c == KEYWORD_FLOAT goto return_1
if c == KEYWORD_DOUBLE goto return_1
if c == KEYWORD_VOID goto return_1
if c == KEYWORD_STRUCT goto return_1
if c == KEYWORD_UNION goto return_1
if c == KEYWORD_ENUM goto return_1
goto return_0
:token_is_ident_type
token += 8
c = *8token
local b
b = ident_list_lookup(typedefs, c)
if b != 0 goto return_1
goto return_0
2022-01-18 16:29:48 -05:00
function parse_tokens
argument tokens
local token
local ident
local type
2022-01-26 14:54:23 -05:00
local p
2022-01-26 16:40:11 -05:00
local base_type
local base_type_end
local prefix
local prefix_end
local suffix
local suffix_end
2022-01-18 16:29:48 -05:00
token = tokens
:parse_tokens_loop
if *1token == TOKEN_EOF goto parse_tokens_eof
if *1token == KEYWORD_TYPEDEF goto parse_typedef
2022-01-26 16:40:11 -05:00
die(.str_parse_tokens_ni)
:str_parse_tokens_ni
string parse_tokens not implemented.
byte 0
2022-01-18 16:29:48 -05:00
:parse_typedef
2022-01-26 16:40:11 -05:00
base_type = token + 16
base_type_end = type_get_base_end(base_type)
token = base_type_end
2022-01-24 14:06:52 -05:00
2022-01-26 14:54:23 -05:00
:typedef_loop
2022-01-26 16:40:11 -05:00
prefix = token
prefix_end = type_get_prefix_end(prefix)
if *1prefix_end != TOKEN_IDENTIFIER goto typedef_no_ident
ident = prefix_end + 8
ident = *8ident
suffix = prefix_end + 16
2022-01-26 17:01:06 -05:00
suffix_end = type_get_suffix_end(prefix)
2022-01-26 16:40:11 -05:00
2022-01-27 10:57:18 -05:00
;putc('B)
;putc(':)
;print_tokens(base_type, base_type_end)
;putc('P)
;putc(':)
;print_tokens(prefix, prefix_end)
;putc('S)
;putc(':)
;print_tokens(suffix, suffix_end)
2022-01-26 16:40:11 -05:00
type = types_bytes_used
parse_type_declarators(prefix, prefix_end, suffix, suffix_end)
parse_base_type(base_type)
2022-01-27 10:57:18 -05:00
puts(.str_typedef)
putc(32)
2022-01-26 14:54:23 -05:00
print_type(type)
putc(10)
2022-01-26 16:40:11 -05:00
2022-01-26 14:54:23 -05:00
ident_list_add(typedefs, ident, type)
2022-01-26 16:40:11 -05:00
token = suffix_end
if *1token == SYMBOL_SEMICOLON goto typedef_loop_end
if *1token != SYMBOL_COMMA goto bad_typedef
token += 16 ; skip comma
2022-01-26 14:54:23 -05:00
goto typedef_loop
:typedef_loop_end
2022-01-24 14:06:52 -05:00
token += 16 ; skip semicolon
goto parse_tokens_loop
:typedef_no_ident
token_error(tokens, .str_typedef_no_ident)
:str_typedef_no_ident
string No identifier in typedef declaration.
byte 0
2022-01-26 16:40:11 -05:00
:bad_typedef
token_error(tokens, .str_bad_typedef)
:str_bad_typedef
string Bad typedef.
2022-01-24 14:06:52 -05:00
byte 0
2022-01-18 16:29:48 -05:00
:parse_tokens_eof
return
2022-01-19 11:57:42 -05:00
; *p_token should be pointing to a {, this will advance it to point to the matching }
function token_skip_to_matching_rbrace
argument p_token
local token
local depth
token = *8p_token
depth = 0
:skip_rbrace_loop
if *1token == SYMBOL_LBRACE goto skip_rbrace_incdepth
if *1token == SYMBOL_RBRACE goto skip_rbrace_decdepth
if *1token == TOKEN_EOF goto skip_rbrace_eof
:skip_rbrace_next
token += 16
goto skip_rbrace_loop
:skip_rbrace_incdepth
depth += 1
goto skip_rbrace_next
:skip_rbrace_decdepth
depth -= 1
if depth == 0 goto skip_rbrace_ret
goto skip_rbrace_next
:skip_rbrace_ret
*8p_token = token
return
:skip_rbrace_eof
token_error(*8p_token, .str_skip_rbrace_eof)
:str_skip_rbrace_eof
string Unmatched {
byte 0
; *p_token should be pointing to a [, this will advance it to point to the matching ]
function token_skip_to_matching_rsquare
argument p_token
local token
local depth
token = *8p_token
depth = 0
:skip_square_loop
if *1token == SYMBOL_LSQUARE goto skip_square_incdepth
if *1token == SYMBOL_RSQUARE goto skip_square_decdepth
if *1token == TOKEN_EOF goto skip_square_eof
:skip_square_next
token += 16
goto skip_square_loop
:skip_square_incdepth
depth += 1
goto skip_square_next
:skip_square_decdepth
depth -= 1
if depth == 0 goto skip_square_ret
goto skip_square_next
:skip_square_ret
*8p_token = token
return
:skip_square_eof
token_error(*8p_token, .str_skip_square_eof)
:str_skip_square_eof
string Unmatched [
byte 0
; *p_token should be on a ); this goes back to the corresponding (
; THERE MUST ACTUALLY BE A MATCHING BRACKET, OTHERWISE THIS WILL DO BAD THINGS
function token_reverse_to_matching_lparen
argument p_token
local token
local depth
token = *8p_token
depth = 0
:reverse_paren_loop
if *1token == SYMBOL_LPAREN goto reverse_paren_incdepth
if *1token == SYMBOL_RPAREN goto reverse_paren_decdepth
:reverse_paren_next
token -= 16
goto reverse_paren_loop
:reverse_paren_incdepth
depth += 1
if depth == 0 goto reverse_paren_ret
goto reverse_paren_next
:reverse_paren_decdepth
depth -= 1
goto reverse_paren_next
:reverse_paren_ret
*8p_token = token
return
2022-01-18 16:29:48 -05:00
; parse things like `int x` or `int f(void, int, char *)`
; advances *p_token
2022-01-26 14:54:23 -05:00
; returns type ID, or 0, in which case you should look at parse_type_result
2022-01-21 14:41:08 -05:00
function parse_type
; split types into base (B), prefix (P) and suffix (S)
; struct Thing (*things[5])(void), *something_else[3];
; BBBBBBBBBBBB PP SSSSSSSSSS P SSS
2022-01-19 11:57:42 -05:00
; Here, we call `struct Thing` the "base type".
2022-01-26 16:40:11 -05:00
byte 0xcc
; return the end of the base for this type.
function type_get_base_end
argument token
2022-01-19 11:57:42 -05:00
local c
c = *1token
if c == KEYWORD_STRUCT goto skip_struct_union_enum
if c == KEYWORD_UNION goto skip_struct_union_enum
if c == KEYWORD_ENUM goto skip_struct_union_enum
; skip the "base type"
token += 16 ; importantly, this skips the typedef'd name if there is one (e.g. typedef int Foo; Foo x;)
:skip_base_type_loop
c = *1token
if c == KEYWORD_UNSIGNED goto skip_base_type_loop_cont ;e.g. int unsigned x;
if c == KEYWORD_CHAR goto skip_base_type_loop_cont ;e.g. unsigned char x;
if c == KEYWORD_SHORT goto skip_base_type_loop_cont ;e.g. unsigned short x;
if c == KEYWORD_INT goto skip_base_type_loop_cont ;e.g. unsigned int x;
if c == KEYWORD_LONG goto skip_base_type_loop_cont ;e.g. unsigned long x;
if c == KEYWORD_DOUBLE goto skip_base_type_loop_cont ;e.g. long double x;
2022-01-26 14:54:23 -05:00
goto skip_base_type_loop_end
2022-01-19 11:57:42 -05:00
:skip_base_type_loop_cont
token += 16
goto skip_base_type_loop
2022-01-26 14:54:23 -05:00
:skip_base_type_loop_end
2022-01-26 16:40:11 -05:00
return token
2022-01-26 14:54:23 -05:00
2022-01-26 16:40:11 -05:00
:skip_struct_union_enum
token += 16
if *1token != TOKEN_IDENTIFIER goto skip_sue_no_name
token += 16 ; struct *blah*
:skip_sue_no_name
if *1token != SYMBOL_LBRACE goto skip_base_type_loop_end ; e.g. struct Something x[5];
; okay we have something like
; struct {
; int x, y;
; } test;
token_skip_to_matching_rbrace(&token)
token += 16
goto skip_base_type_loop_end
; return the end of this type prefix
function type_get_prefix_end
argument token
local c
2022-01-19 11:57:42 -05:00
:find_prefix_end_loop
c = *1token
if c == TOKEN_IDENTIFIER goto found_prefix_end
if c == KEYWORD_UNSIGNED goto prefix_end_cont
if c == KEYWORD_CHAR goto prefix_end_cont
if c == KEYWORD_SHORT goto prefix_end_cont
if c == KEYWORD_INT goto prefix_end_cont
if c == KEYWORD_LONG goto prefix_end_cont
if c == KEYWORD_FLOAT goto prefix_end_cont
if c == KEYWORD_DOUBLE goto prefix_end_cont
if c == SYMBOL_LPAREN goto prefix_end_cont
if c == SYMBOL_TIMES goto prefix_end_cont
if c == SYMBOL_LSQUARE goto found_prefix_end
if c == SYMBOL_RPAREN goto found_prefix_end
goto found_prefix_end
2022-01-18 16:29:48 -05:00
2022-01-19 11:57:42 -05:00
:prefix_end_cont
token += 16
goto find_prefix_end_loop
:found_prefix_end
2022-01-26 16:40:11 -05:00
return token
; return the end of this type suffix
; NOTE: you must pass in the PREFIX.
; (In general, we can't find the end of the suffix without knowing the prefix.)
; int (*x);
; ^ suffix ends here
; (int *)
; ^ suffix ends here
function type_get_suffix_end
argument prefix
local depth
local token
local c
2022-01-19 11:57:42 -05:00
; find end of suffix
2022-01-26 16:40:11 -05:00
token = prefix
2022-01-19 11:57:42 -05:00
depth = 0 ; parenthesis/square bracket depth
:suffix_end_loop
c = *1token
if c == TOKEN_IDENTIFIER goto suffix_end_cont
if c == SYMBOL_LSQUARE goto suffix_end_incdepth
if c == SYMBOL_RSQUARE goto suffix_end_decdepth
if c == SYMBOL_LPAREN goto suffix_end_incdepth
if c == SYMBOL_RPAREN goto suffix_end_decdepth
if c == SYMBOL_TIMES goto suffix_end_cont
if depth == 0 goto suffix_end_found
2022-01-26 16:40:11 -05:00
if c == TOKEN_EOF goto type_get_suffix_bad_type
2022-01-19 11:57:42 -05:00
goto suffix_end_cont
:suffix_end_incdepth
depth += 1
goto suffix_end_cont
:suffix_end_decdepth
depth -= 1
if depth < 0 goto suffix_end_found
goto suffix_end_cont
:suffix_end_cont
token += 16
goto suffix_end_loop
:suffix_end_found
2022-01-26 16:40:11 -05:00
return token
:type_get_suffix_bad_type
2022-01-26 18:00:47 -05:00
token_error(prefix, .str_bad_type_suffix)
:str_bad_type_suffix
string Bad type suffix.
byte 0
2022-01-26 16:40:11 -05:00
; writes to *(types + types_bytes_used), and updates types_bytes_used
function parse_type_declarators
argument prefix
argument prefix_end
argument suffix
argument suffix_end
local p
local expr
local n
local c
local depth
2022-01-26 16:40:11 -05:00
local out
2022-01-19 22:23:29 -05:00
; main loop for parsing types
2022-01-26 16:40:11 -05:00
:type_declarators_loop
2022-01-19 22:23:29 -05:00
p = prefix_end - 16
if *1suffix == SYMBOL_LSQUARE goto parse_array_type
if *1suffix == SYMBOL_LPAREN goto parse_function_type
if *1p == SYMBOL_TIMES goto parse_pointer_type
2022-01-26 16:40:11 -05:00
if suffix == suffix_end goto type_declarators_loop_end
2022-01-19 22:23:29 -05:00
if *1suffix == SYMBOL_RPAREN goto parse_type_remove_parentheses
2022-01-26 16:40:11 -05:00
goto parse_typedecls_bad_type
2022-01-19 22:23:29 -05:00
:parse_pointer_type
2022-01-26 16:40:11 -05:00
out = types + types_bytes_used
2022-01-21 14:41:08 -05:00
*1out = TYPE_POINTER
2022-01-26 16:40:11 -05:00
types_bytes_used += 1
2022-01-21 14:41:08 -05:00
prefix_end = p
2022-01-26 16:40:11 -05:00
goto type_declarators_loop
2022-01-19 22:23:29 -05:00
:parse_array_type
2022-01-26 16:40:11 -05:00
out = types + types_bytes_used
*1out = TYPE_ARRAY
types_bytes_used += 1
2022-01-24 18:27:09 -05:00
; little hack to avoid screwing up types like double[sizeof(int)]
2022-01-27 10:57:18 -05:00
; temporarily pretend we're using a lot more of types
local prev_types_bytes_used
2022-01-24 18:27:09 -05:00
prev_types_bytes_used = types_bytes_used
2022-01-27 10:57:18 -05:00
types_bytes_used += 4000
2022-01-24 18:27:09 -05:00
expr = malloc(4000)
p = suffix
token_skip_to_matching_rsquare(&p)
suffix += 16 ; skip [
parse_expression(suffix, p, expr)
2022-01-24 20:53:37 -05:00
;print_expression(expr)
;putc(10)
2022-01-26 16:40:11 -05:00
evaluate_constant_expression(prefix, expr, &n)
if n < 0 goto bad_array_size
free(expr)
2022-01-27 10:57:18 -05:00
2022-01-24 18:27:09 -05:00
types_bytes_used = prev_types_bytes_used
2022-01-26 16:40:11 -05:00
out = types + types_bytes_used
*8out = n
types_bytes_used += 8
suffix = p + 16
2022-01-26 16:40:11 -05:00
goto type_declarators_loop
:bad_array_size
2022-01-26 16:40:11 -05:00
token_error(suffix, .str_bad_array_size)
:str_bad_array_size
string Very large or negative array size.
byte 0
2022-01-19 22:23:29 -05:00
:parse_function_type
2022-01-26 17:01:06 -05:00
local param_base_type
local param_prefix
local param_prefix_end
local param_suffix
local param_suffix_end
2022-01-21 14:41:08 -05:00
p = suffix + 16
2022-01-26 16:40:11 -05:00
out = types + types_bytes_used
2022-01-21 14:41:08 -05:00
*1out = TYPE_FUNCTION
2022-01-26 16:40:11 -05:00
types_bytes_used += 1
2022-01-21 14:41:08 -05:00
:function_type_loop
2022-01-26 17:01:06 -05:00
param_base_type = p
param_prefix = type_get_base_end(param_base_type)
param_prefix_end = type_get_prefix_end(param_prefix)
param_suffix = param_prefix_end
if *1param_suffix != TOKEN_IDENTIFIER goto functype_no_ident
param_suffix += 16
:functype_no_ident
param_suffix_end = type_get_suffix_end(param_prefix)
parse_type_declarators(param_prefix, param_prefix_end, param_suffix, param_suffix_end)
parse_base_type(param_base_type)
p = param_suffix_end
if *1p == SYMBOL_RPAREN goto function_type_loop_end
if *1p != SYMBOL_COMMA goto parse_typedecls_bad_type
p += 16
goto function_type_loop
:function_type_loop_end
2022-01-26 16:40:11 -05:00
out = types + types_bytes_used
2022-01-21 14:41:08 -05:00
*1out = 0
2022-01-26 16:40:11 -05:00
types_bytes_used += 1
2022-01-21 14:41:08 -05:00
suffix = p + 16
2022-01-26 16:40:11 -05:00
goto type_declarators_loop
2022-01-19 22:23:29 -05:00
:parse_type_remove_parentheses
2022-01-26 16:40:11 -05:00
if *1p != SYMBOL_LPAREN goto parse_typedecls_bad_type
2022-01-21 14:41:08 -05:00
prefix_end = p
suffix += 16
2022-01-26 16:40:11 -05:00
goto type_declarators_loop
:type_declarators_loop_end
return 0
:parse_typedecls_bad_type
2022-01-26 18:00:47 -05:00
token_error(prefix, .str_bad_type_declarators)
:str_bad_type_declarators
string Bad type declarators.
byte 0
2022-01-26 16:40:11 -05:00
; writes to *(types + types_bytes_used), and updates types_bytes_used (no return value)
function parse_base_type
argument base_type
local out
local flags
local p
local c
local depth
2022-01-26 18:00:47 -05:00
local is_struct
is_struct = 0
2022-01-26 16:40:11 -05:00
out = types + types_bytes_used
c = *1base_type
if c == TOKEN_IDENTIFIER goto base_type_typedef
if c == KEYWORD_STRUCT goto base_type_struct
if c == KEYWORD_UNION goto base_type_union
if c == KEYWORD_ENUM goto base_type_enum
if c == KEYWORD_FLOAT goto base_type_float
if c == KEYWORD_VOID goto base_type_void
2022-01-19 22:23:29 -05:00
; "normal" type like int, unsigned char, etc.
; annoyingly, all of these are equivalent to `unsigned long`:
; unsigned long int
; long unsigned int
; int long unsigned
; etc.
; so we represent these as PARSETYPE_FLAG_UNSIGNED|PARSETYPE_FLAG_LONG|PARSETYPE_FLAG_INT.
#define PARSETYPE_FLAG_UNSIGNED 1
#define PARSETYPE_FLAG_CHAR 2
#define PARSETYPE_FLAG_SHORT 4
#define PARSETYPE_FLAG_INT 8
#define PARSETYPE_FLAG_LONG 16
#define PARSETYPE_FLAG_DOUBLE 32
flags = 0
2022-01-26 16:40:11 -05:00
p = base_type
2022-01-19 22:23:29 -05:00
:base_type_normal_loop
c = *1p
p += 16
if c == KEYWORD_CHAR goto base_type_flag_char
if c == KEYWORD_SHORT goto base_type_flag_short
if c == KEYWORD_INT goto base_type_flag_int
if c == KEYWORD_LONG goto base_type_flag_long
if c == KEYWORD_UNSIGNED goto base_type_flag_unsigned
if c == KEYWORD_DOUBLE goto base_type_flag_double
goto base_type_normal_loop_end
:base_type_flag_char
2022-01-26 18:00:47 -05:00
c = flags & PARSETYPE_FLAG_CHAR
if c != 0 goto repeated_base_type
2022-01-19 22:23:29 -05:00
flags |= PARSETYPE_FLAG_CHAR
goto base_type_normal_loop
:base_type_flag_short
2022-01-26 18:00:47 -05:00
c = flags & PARSETYPE_FLAG_SHORT
if c != 0 goto repeated_base_type
2022-01-19 22:23:29 -05:00
flags |= PARSETYPE_FLAG_SHORT
goto base_type_normal_loop
:base_type_flag_int
2022-01-26 18:00:47 -05:00
c = flags & PARSETYPE_FLAG_INT
if c != 0 goto repeated_base_type
2022-01-19 22:23:29 -05:00
flags |= PARSETYPE_FLAG_INT
goto base_type_normal_loop
:base_type_flag_long
2022-01-26 18:00:47 -05:00
c = flags & PARSETYPE_FLAG_LONG
if c != 0 goto repeated_base_type
2022-01-19 22:23:29 -05:00
flags |= PARSETYPE_FLAG_LONG
goto base_type_normal_loop
:base_type_flag_unsigned
2022-01-26 18:00:47 -05:00
c = flags & PARSETYPE_FLAG_UNSIGNED
if c != 0 goto repeated_base_type
2022-01-19 22:23:29 -05:00
flags |= PARSETYPE_FLAG_UNSIGNED
goto base_type_normal_loop
:base_type_flag_double
2022-01-26 18:00:47 -05:00
c = flags & PARSETYPE_FLAG_DOUBLE
if c != 0 goto repeated_base_type
2022-01-19 22:23:29 -05:00
flags |= PARSETYPE_FLAG_DOUBLE
goto base_type_normal_loop
2022-01-26 18:00:47 -05:00
:repeated_base_type
token_error(p, .str_repeated_base_type)
:str_repeated_base_type
string Arithmetic type repeated (e.g. unsigned unsigned int).
byte 0
2022-01-19 22:23:29 -05:00
:base_type_normal_loop_end
if flags == 8 goto base_type_int ; `int`
if flags == 1 goto base_type_uint ; `unsigned`
if flags == 9 goto base_type_uint ; `unsigned int` etc.
if flags == 2 goto base_type_char ; `char`
if flags == 3 goto base_type_uchar ; `unsigned char` etc.
if flags == 4 goto base_type_short ; `short`
if flags == 12 goto base_type_short `short int` etc.
if flags == 5 goto base_type_ushort ; `unsigned short` etc.
if flags == 13 goto base_type_ushort ; `unsigned short int` etc.
if flags == 16 goto base_type_long ; `long`
if flags == 24 goto base_type_long ; `long int` etc.
if flags == 17 goto base_type_ulong ; `unsigned long` etc.
if flags == 25 goto base_type_ulong ; `unsigned long int` etc.
if flags == 32 goto base_type_double ; `double`
if flags == 48 goto base_type_double ; `long double` (we use the same type for double and long double)
2022-01-26 16:40:11 -05:00
goto bad_base_type
2022-01-19 22:23:29 -05:00
:base_type_char
*1out = TYPE_CHAR
out += 1
goto base_type_done
:base_type_uchar
*1out = TYPE_UNSIGNED_CHAR
out += 1
goto base_type_done
:base_type_short
*1out = TYPE_SHORT
out += 1
goto base_type_done
:base_type_ushort
*1out = TYPE_UNSIGNED_SHORT
out += 1
goto base_type_done
:base_type_int
*1out = TYPE_INT
out += 1
goto base_type_done
:base_type_uint
*1out = TYPE_UNSIGNED_INT
out += 1
goto base_type_done
:base_type_long
*1out = TYPE_LONG
out += 1
goto base_type_done
:base_type_ulong
*1out = TYPE_UNSIGNED_LONG
out += 1
goto base_type_done
:base_type_double
*1out = TYPE_DOUBLE
out += 1
goto base_type_done
:base_type_done
2022-01-26 16:40:11 -05:00
types_bytes_used = out - types
return 0
2022-01-19 22:23:29 -05:00
:base_type_struct
2022-01-26 18:00:47 -05:00
is_struct = 1
; fallthrough
2022-01-19 22:23:29 -05:00
:base_type_union
2022-01-26 18:00:47 -05:00
local struct_name
local struct
struct_name = .empty_string
2022-01-26 16:40:11 -05:00
p = base_type + 16
2022-01-26 18:00:47 -05:00
if *1p != TOKEN_IDENTIFIER goto base_type_have_name
p += 8
struct_name = *8p
p += 8
:base_type_have_name
c = ident_list_lookup(structures, struct_name)
2022-01-25 18:28:26 -05:00
if *1p == SYMBOL_LBRACE goto base_type_struct_definition
2022-01-26 18:00:47 -05:00
2022-01-25 18:28:26 -05:00
if c == 0 goto base_type_incomplete_struct
; e.g. struct Foo x; where struct Foo has been defined
*1out = TYPE_STRUCT
out += 1
*8out = c
out += 8
goto base_type_done
:base_type_incomplete_struct
; e.g. struct Foo *x; where struct Foo hasn't been defined
*1out = TYPE_VOID
out += 1
goto base_type_done
:base_type_struct_definition
2022-01-26 18:00:47 -05:00
local member_base_type
local member_prefix
local member_prefix_end
local member_suffix
local member_suffix_end
local member_name
local member_type
local member_align
local member_size
if c != 0 goto struct_redefinition
struct = ident_list_create(8000) ; note: maximum "* 127 members in a single structure or union" C89 § 2.2.4.1
*1out = TYPE_STRUCT
out += 1
*8out = struct
out += 8
types_bytes_used = out - types
p += 16 ; skip opening {
local offset
offset = 0
ident_list_add(structures, struct_name, struct)
:struct_defn_loop
if *1p == SYMBOL_RBRACE goto struct_defn_loop_end
member_base_type = p
p = type_get_base_end(member_base_type)
:struct_defn_decl_loop ; handle each element of int x, y[5], *z;
member_prefix = p
member_prefix_end = type_get_prefix_end(member_prefix)
if *1member_prefix_end != TOKEN_IDENTIFIER goto member_no_identifier
member_name = member_prefix_end + 8
member_name = *8member_name
2022-01-27 10:57:18 -05:00
c = ident_list_lookup_check(struct, member_name, 0)
if c == 1 goto duplicate_member
2022-01-26 18:00:47 -05:00
member_suffix = member_prefix_end + 16
member_suffix_end = type_get_suffix_end(member_prefix)
member_type = types_bytes_used
parse_type_declarators(member_prefix, member_prefix_end, member_suffix, member_suffix_end)
parse_base_type(member_base_type)
; make sure struct member is aligned
member_align = type_alignof(member_type)
; offset = ceil(offset / align) * align
offset += member_align - 1
offset /= member_align
offset *= member_align
if offset ] 0xffffffff goto struct_too_large
;putnln(offset)
; data = (type << 32) | offset
c = member_type < 32
c |= offset
ident_list_add(struct, member_name, c)
member_size = type_sizeof(member_type)
offset += member_size * is_struct ; keep offset as 0 if this is a union
p = member_suffix_end
if *1p == SYMBOL_SEMICOLON goto struct_defn_decl_loop_end
if *1p != SYMBOL_COMMA goto struct_bad_declaration
p += 16 ; skip comma
goto struct_defn_decl_loop
2022-01-27 10:57:18 -05:00
:duplicate_member
token_error(p, .str_duplicate_member)
:str_duplicate_member
string Duplicate member in struct/union.
byte 0
2022-01-26 18:00:47 -05:00
:struct_defn_decl_loop_end
p += 16 ; skip semicolon
goto struct_defn_loop
:struct_defn_loop_end
out = types + types_bytes_used
goto base_type_done
:struct_redefinition
token_error(p, .str_struct_redefinition)
:str_struct_redefinition
string struct redefinition.
byte 0
:struct_bad_declaration
token_error(p, .str_struct_bad_declaration)
:str_struct_bad_declaration
string Bad declaration in struct.
byte 0
:struct_too_large
token_error(p, .str_struct_too_large)
:str_struct_too_large
string struct too large (maximum is 4GB).
byte 0
:member_no_identifier
; e.g. struct { int; };
token_error(p, .str_member_no_identifier)
:str_member_no_identifier
string No identifier in struct member.
byte 0
2022-01-19 22:23:29 -05:00
:base_type_enum
2022-01-24 20:53:37 -05:00
local q
2022-01-26 18:00:47 -05:00
local expr
2022-01-24 20:53:37 -05:00
*1out = TYPE_INT ; treat any enum as int
out += 1
types_bytes_used = out - types
2022-01-26 16:40:11 -05:00
p = base_type + 16
2022-01-24 20:53:37 -05:00
if *1p == SYMBOL_LBRACE goto enum_definition
2022-01-26 16:40:11 -05:00
if *1p != TOKEN_IDENTIFIER goto bad_base_type ; e.g. enum int x;
2022-01-24 20:53:37 -05:00
p += 16
if *1p == SYMBOL_LBRACE goto enum_definition
goto base_type_done ; just using an enum type, not defining it.
:enum_definition
local name
local value
value = -1 ; consider initial previous value as -1, because -1 + 1 = 0
p += 16 ; skip opening {
:enum_defn_loop
if *1p == SYMBOL_RBRACE goto enum_defn_loop_end
if *1p != TOKEN_IDENTIFIER goto bad_enum_definition
p += 8
name = *8p
p += 8
if *1p == SYMBOL_COMMA goto enum_defn_no_equals
if *1p == SYMBOL_RBRACE goto enum_defn_no_equals
if *1p != SYMBOL_EQ goto bad_enum_definition ; e.g. enum { X ! };
; value provided, e.g. X = 5,
p += 16
depth = 0 ; parenthesis depth
q = p
2022-01-25 17:50:00 -05:00
; find matching comma/right brace
; -- yes, a comma can appear in an enumerator expression, e.g.
2022-01-24 20:53:37 -05:00
; enum { X = sizeof(struct{int x, y;}) };
; or enum { X = (enum {A,B})3 };
; find associated comma or right-brace
:enum_comma_loop
if depth > 0 goto enum_comma_deep
if *1q == SYMBOL_COMMA goto enum_comma_loop_end
if *1q == SYMBOL_RBRACE goto enum_comma_loop_end
:enum_comma_deep
2022-01-26 16:40:11 -05:00
if *1q == TOKEN_EOF goto bad_base_type
2022-01-24 20:53:37 -05:00
c = *1q
q += 16
if c == SYMBOL_LPAREN goto enum_comma_incdepth
if c == SYMBOL_RPAREN goto enum_comma_decdepth
goto enum_comma_loop
:enum_comma_incdepth
depth += 1
goto enum_comma_loop
:enum_comma_decdepth
depth -= 1
goto enum_comma_loop
:enum_comma_loop_end
expr = malloc(4000)
parse_expression(p, q, expr)
evaluate_constant_expression(p, expr, &value)
free(expr)
if value < -0x80000000 goto bad_enumerator
if value > 0x7fffffff goto bad_enumerator
ident_list_add(enumerators, name, value)
p = q
if *1p == SYMBOL_RBRACE goto enum_defn_loop_end
p += 16 ; skip ,
goto enum_defn_loop
:bad_enumerator
token_error(p, .str_bad_enumerator)
:str_bad_enumerator
string Enumerators too large for int.
byte 0
:enum_defn_no_equals
; no value provided, e.g. X,
; the value of this enumerator is one more than the value of the last one
value += 1
ident_list_add(enumerators, name, value)
if *1p == SYMBOL_RBRACE goto enum_defn_loop_end
p += 16 ; skip ,
goto enum_defn_loop
:enum_defn_loop_end
out = types + types_bytes_used ; fix stuff in case there were any types in the enumerator expressions
goto base_type_done
:bad_enum_definition
token_error(base_type, .str_bad_enum_defn)
2022-01-24 20:53:37 -05:00
:str_bad_enum_defn
string Bad enum definition.
byte 0
2022-01-19 22:23:29 -05:00
:base_type_float
*1out = TYPE_FLOAT
out += 1
goto base_type_done
:base_type_void
*1out = TYPE_VOID
out += 1
goto base_type_done
:base_type_typedef
2022-01-26 16:40:11 -05:00
p = base_type + 8
2022-01-19 22:23:29 -05:00
c = ident_list_lookup(typedefs, *8p)
2022-01-26 16:40:11 -05:00
if c == 0 goto bad_base_type
local len
len = type_length(c)
2022-01-24 14:06:52 -05:00
c += types
2022-01-26 16:40:11 -05:00
memcpy(out, c, len)
out += len
2022-01-19 22:23:29 -05:00
goto base_type_done
2022-01-19 11:57:42 -05:00
2022-01-26 16:40:11 -05:00
:bad_base_type
token_error(base_type, .str_bad_base_type)
:str_bad_base_type
string Bad base type.
byte 0
2022-01-15 10:35:36 -05:00
; how many bytes does it take to encode this type?
function type_length
argument type
local p
local n
p = types + type
if *1p <= TYPE_DOUBLE goto return_1
if *1p != TYPE_POINTER goto type_length_not_pointer
type += 1
n = type_length(type)
return n + 1
:type_length_not_pointer
if *1p != TYPE_ARRAY goto type_length_not_array
type += 9
n = type_length(type)
return n + 9
:type_length_not_array
if *1p == TYPE_STRUCT goto return_5
2022-01-21 14:41:08 -05:00
if *1p != TYPE_FUNCTION goto type_length_not_function
2022-01-19 22:23:29 -05:00
local start
start = type
type += 1
:type_length_function_loop
p = types + type
if *1p == 0 goto type_length_function_loop_end
type += type_length(type)
2022-01-21 14:41:08 -05:00
goto type_length_function_loop
2022-01-19 22:23:29 -05:00
:type_length_function_loop_end
type += 1
type += type_length(type)
return type - start
:type_length_not_function
2022-01-15 10:35:36 -05:00
fputs(2, .str_type_length_bad_type)
exit(1)
:str_type_length_bad_type
string Bad type passed to type_length. This shouldn't happen.
byte 10
byte 0
; returns length of type
2022-01-21 14:41:08 -05:00
function type_copy_ids
2022-01-15 10:35:36 -05:00
argument dest
argument src
local n
n = type_length(src)
dest += types
src += types
memcpy(dest, src, n)
return n
2022-01-14 22:16:57 -05:00
function type_create_pointer
argument type
2022-01-15 10:35:36 -05:00
local id
local p
id = types_bytes_used
p = types + id
*1p = TYPE_POINTER
types_bytes_used += 1
p = id + 1
2022-01-21 14:41:08 -05:00
types_bytes_used += type_copy_ids(p, type)
2022-01-15 10:35:36 -05:00
return id
2022-01-14 22:16:57 -05:00
2022-01-13 16:12:28 -05:00
function parse_expression
argument tokens
argument tokens_end
argument out
local in
local a
local b
local c
local p
2022-01-13 18:13:29 -05:00
local n
2022-01-14 13:41:44 -05:00
local type
2022-01-13 18:13:29 -05:00
local best
local best_precedence
local depth
2022-01-13 16:12:28 -05:00
local value
2022-01-14 22:16:57 -05:00
local first_token
2022-01-13 18:13:29 -05:00
:parse_expression_top
2022-01-13 16:12:28 -05:00
;print_tokens(tokens, tokens_end)
2022-01-14 14:02:34 -05:00
2022-01-14 13:41:44 -05:00
type = out + 4
2022-01-13 16:12:28 -05:00
if tokens == tokens_end goto empty_expression
p = tokens + 16
if p == tokens_end goto single_token_expression
2022-01-13 18:13:29 -05:00
if *1tokens != SYMBOL_LPAREN goto parse_expression_not_entirely_in_parens
p = tokens_end - 16
if *1p != SYMBOL_RPAREN goto parse_expression_not_entirely_in_parens
2022-01-13 16:12:28 -05:00
2022-01-13 18:13:29 -05:00
depth = 1 ; bracket depth
p = tokens + 16
a = tokens_end - 16 ; stop point
:expr_paren_check_loop
if p >= a goto expr_paren_check_loop_end
c = *1p
p += 16
if c == SYMBOL_LPAREN goto expr_paren_check_loop_incdepth
if c == SYMBOL_RPAREN goto expr_paren_check_loop_decdepth
goto expr_paren_check_loop
:expr_paren_check_loop_incdepth
depth += 1
goto expr_paren_check_loop
:expr_paren_check_loop_decdepth
depth -= 1
if depth == 0 goto parse_expression_not_entirely_in_parens
goto expr_paren_check_loop
:expr_paren_check_loop_end
; if we made it this far, the expression is entirely in parenthesis, e.g. (x+2)
tokens += 16
tokens_end -= 16
goto parse_expression_top
:parse_expression_not_entirely_in_parens
; look for the operator with the lowest precedence not in brackets
depth = 0 ; paren/square bracket depth
2022-01-14 22:16:57 -05:00
first_token = 1
2022-01-13 18:13:29 -05:00
p = tokens
best = 0
best_precedence = 1000
2022-01-13 23:20:45 -05:00
goto expr_find_operator_loop_first
2022-01-13 18:13:29 -05:00
:expr_find_operator_loop
2022-01-14 22:16:57 -05:00
first_token = 0
2022-01-13 23:20:45 -05:00
:expr_find_operator_loop_first
2022-01-13 18:13:29 -05:00
if p >= tokens_end goto expr_find_operator_loop_end
2022-01-14 22:16:57 -05:00
n = p
2022-01-13 18:13:29 -05:00
c = *1p
p += 16
2022-01-14 14:02:34 -05:00
if depth > 0 goto expr_findop_not_new_best
2022-01-13 18:13:29 -05:00
if depth < 0 goto expr_too_many_closing_brackets
2022-01-14 22:16:57 -05:00
a = operator_precedence(n, first_token)
2022-01-13 18:13:29 -05:00
n = a
2022-01-14 22:16:57 -05:00
if a == 0xe0 goto select_leftmost ; ensure that the leftmost unary operator is processed first
b = operator_right_associative(c)
if b != 0 goto select_leftmost ; ensure that the leftmost += / -= / etc. is processed first
goto select_rightmost
:select_leftmost
n += 1
; fallthrough
:select_rightmost
2022-01-14 13:41:44 -05:00
if n > best_precedence goto expr_findop_not_new_best
2022-01-13 18:13:29 -05:00
; new best!
best = p - 16
;putc('O)
;putc(':)
;putn(*1best)
;putc(32)
;putc('P)
;putc(':)
;putnln(a)
2022-01-13 18:13:29 -05:00
best_precedence = a
2022-01-14 13:41:44 -05:00
:expr_findop_not_new_best
if c == SYMBOL_LPAREN goto expr_findop_incdepth
if c == SYMBOL_RPAREN goto expr_findop_decdepth
if c == SYMBOL_LSQUARE goto expr_findop_incdepth
if c == SYMBOL_RSQUARE goto expr_findop_decdepth
goto expr_find_operator_loop
2022-01-13 18:13:29 -05:00
:expr_findop_incdepth
depth += 1
goto expr_find_operator_loop
:expr_findop_decdepth
depth -= 1
goto expr_find_operator_loop
:expr_find_operator_loop_end
2022-01-13 16:12:28 -05:00
2022-01-14 22:16:57 -05:00
2022-01-13 20:29:52 -05:00
if best == 0 goto unrecognized_expression
2022-01-14 10:08:02 -05:00
2022-01-14 22:16:57 -05:00
n = best - tokens
2022-01-14 10:08:02 -05:00
c = *1best
2022-01-27 10:57:18 -05:00
2022-01-13 20:29:52 -05:00
if best == tokens goto parse_expr_unary
; it's a binary expression.
if c == SYMBOL_PLUS_PLUS goto parse_postincrement
if c == SYMBOL_MINUS_MINUS goto parse_postdecrement
if c == SYMBOL_QUESTION goto parse_conditional
*1out = binop_symbol_to_expression_type(c)
2022-01-14 22:16:57 -05:00
c = *1out
2022-01-13 20:29:52 -05:00
out += 8
2022-01-14 22:16:57 -05:00
if c == EXPRESSION_DOT goto parse_expr_member
if c == EXPRESSION_ARROW goto parse_expr_member
2022-01-14 13:41:44 -05:00
a = out + 4 ; type of first operand
2022-01-13 23:20:45 -05:00
out = parse_expression(tokens, best, out) ; first operand
2022-01-13 20:29:52 -05:00
p = best + 16
2022-01-14 13:41:44 -05:00
b = out + 4 ; type of second operand
2022-01-14 22:16:57 -05:00
if c != EXPRESSION_SUBSCRIPT goto binary_not_subscript
2022-01-14 13:41:44 -05:00
tokens_end -= 16
if *1tokens_end != SYMBOL_RSQUARE goto unrecognized_expression
:binary_not_subscript
2022-01-14 22:16:57 -05:00
2022-01-13 20:34:13 -05:00
out = parse_expression(p, tokens_end, out) ; second operand
2022-01-14 13:41:44 -05:00
2022-01-14 22:16:57 -05:00
if c == EXPRESSION_LSHIFT goto type_shift
if c == EXPRESSION_RSHIFT goto type_shift
if c == EXPRESSION_SUBSCRIPT goto type_subscript
if c == EXPRESSION_EQ goto type_int
if c == EXPRESSION_NEQ goto type_int
if c == EXPRESSION_LEQ goto type_int
if c == EXPRESSION_GEQ goto type_int
if c == EXPRESSION_LT goto type_int
if c == EXPRESSION_GT goto type_int
if c == EXPRESSION_COMMA goto type_binary_right
if c == EXPRESSION_EQ goto type_binary_left
if c == EXPRESSION_ASSIGN_ADD goto type_binary_left
if c == EXPRESSION_ASSIGN_SUB goto type_binary_left
if c == EXPRESSION_ASSIGN_MUL goto type_binary_left
if c == EXPRESSION_ASSIGN_DIV goto type_binary_left
if c == EXPRESSION_ASSIGN_REMAINDER goto type_binary_left
if c == EXPRESSION_ASSIGN_AND goto type_binary_left_integer
if c == EXPRESSION_ASSIGN_XOR goto type_binary_left_integer
if c == EXPRESSION_ASSIGN_OR goto type_binary_left_integer
if c == EXPRESSION_ASSIGN_LSHIFT goto type_binary_left_integer
if c == EXPRESSION_ASSIGN_RSHIFT goto type_binary_left_integer
if c == EXPRESSION_LOGICAL_OR goto type_int
if c == EXPRESSION_LOGICAL_AND goto type_int
if c == EXPRESSION_BITWISE_AND goto type_binary_usual_integer
if c == EXPRESSION_BITWISE_XOR goto type_binary_usual_integer
if c == EXPRESSION_BITWISE_OR goto type_binary_usual_integer
if c == EXPRESSION_ADD goto type_plus
if c == EXPRESSION_SUB goto type_minus
if c == EXPRESSION_MUL goto type_binary_usual
if c == EXPRESSION_DIV goto type_binary_usual
if c == EXPRESSION_REMAINDER goto type_binary_usual_integer
2022-01-14 15:06:11 -05:00
fputs(2, .str_binop_this_shouldnt_happen)
exit(1)
:str_binop_this_shouldnt_happen
string Bad binop symbol (this shouldn't happen).
byte 10
byte 0
:type_plus
2022-01-14 22:16:57 -05:00
p = types + *4a
if *1p == TYPE_POINTER goto type_binary_left ; pointer plus integer
p = types + *4b
if *1p == TYPE_POINTER goto type_binary_right ; integer plus pointer
goto type_binary_usual
2022-01-14 15:06:11 -05:00
:type_minus
2022-01-14 22:16:57 -05:00
p = types + *4a
if *1p == TYPE_POINTER goto type_minus_left_ptr
goto type_binary_usual
:type_minus_left_ptr
p = types + *4b
if *1p == TYPE_POINTER goto type_long ; pointer difference
goto type_binary_left ; pointer minus integer
2022-01-14 13:41:44 -05:00
:type_subscript
p = types + *4a
if *1p == TYPE_POINTER goto type_subscript_pointer
if *1p == TYPE_ARRAY goto type_subscript_array
goto subscript_bad_type
:type_subscript_pointer
*4type = *4a + 1
return out
:type_subscript_array
*4type = *4a + 9
return out
:subscript_bad_type
token_error(tokens, .str_subscript_bad_type)
:str_subscript_bad_type
string Subscript of non-pointer type.
byte 0
2022-01-14 22:16:57 -05:00
; apply the "usual conversions"
2022-01-14 13:41:44 -05:00
:type_binary_usual
*4type = expr_binary_type_usual_conversions(tokens, *4a, *4b)
return out
2022-01-14 22:16:57 -05:00
; like type_binary_usual, but the operands must be integers
:type_binary_usual_integer
2022-01-14 15:06:11 -05:00
*4type = expr_binary_type_usual_conversions(tokens, *4a, *4b)
p = types + *4type
if *1p >= TYPE_FLOAT goto expr_binary_bad_types
return out
2022-01-14 22:16:57 -05:00
:type_binary_left_integer
2022-01-14 15:06:11 -05:00
p = types + *4a
if *1p >= TYPE_FLOAT goto expr_binary_bad_types
2022-01-14 22:16:57 -05:00
p = types + *4b
if *1p >= TYPE_FLOAT goto expr_binary_bad_types
2022-01-14 15:06:11 -05:00
goto type_binary_left
2022-01-14 13:41:44 -05:00
:type_binary_left
*4type = *4a
return out
2022-01-14 15:06:11 -05:00
:type_binary_right
*4type = *4b
return out
2022-01-14 22:16:57 -05:00
:type_shift
p = types + *4a
if *1p >= TYPE_FLOAT goto expr_binary_bad_types
p = types + *4b
if *1p >= TYPE_FLOAT goto expr_binary_bad_types
2022-01-14 13:41:44 -05:00
*4type = type_promotion(*4a)
return out
2022-01-14 22:16:57 -05:00
; the type here is just int
:type_int
*4type = TYPE_INT
return out
:type_long
*4type = TYPE_LONG
return out
2022-01-14 15:06:11 -05:00
:expr_binary_bad_types
bad_types_to_operator(tokens, *4a, *4b)
2022-01-13 20:29:52 -05:00
:parse_expr_unary
2022-01-24 13:22:16 -05:00
if c == KEYWORD_SIZEOF goto parse_sizeof
2022-01-14 10:08:02 -05:00
*1out = unary_op_to_expression_type(c)
2022-01-14 22:16:57 -05:00
c = *1out
2022-01-24 18:27:09 -05:00
if c == EXPRESSION_CAST goto parse_cast
2022-01-14 10:08:02 -05:00
out += 8
2022-01-14 22:16:57 -05:00
a = out + 4 ; type of operand
2022-01-14 10:08:02 -05:00
p = tokens + 16
out = parse_expression(p, tokens_end, out)
2022-01-14 22:16:57 -05:00
p = types + *4a
if c == EXPRESSION_BITWISE_NOT goto unary_type_integral
if c == EXPRESSION_UNARY_PLUS goto unary_type_promote
if c == EXPRESSION_UNARY_MINUS goto unary_type_promote
if c == EXPRESSION_LOGICAL_NOT goto unary_type_logical_not
if c == EXPRESSION_ADDRESS_OF goto unary_address_of
if c == EXPRESSION_DEREFERENCE goto unary_dereference
if c == EXPRESSION_PRE_INCREMENT goto unary_type_arithmetic_nopromote
if c == EXPRESSION_PRE_DECREMENT goto unary_type_arithmetic_nopromote
2022-01-14 22:16:57 -05:00
fputs(2, .str_unop_this_shouldnt_happen)
exit(1)
:str_unop_this_shouldnt_happen
string Bad unary symbol (this shouldn't happen).
byte 10
byte 0
2022-01-24 18:27:09 -05:00
:parse_cast
2022-01-26 17:01:06 -05:00
local cast_base_type
local cast_prefix
local cast_suffix
local cast_suffix_end
cast_base_type = best + 16
cast_prefix = type_get_base_end(cast_base_type)
cast_suffix = type_get_prefix_end(cast_prefix)
cast_suffix_end = type_get_suffix_end(cast_prefix)
a = types_bytes_used
parse_type_declarators(cast_prefix, cast_suffix, cast_suffix, cast_suffix_end)
parse_base_type(cast_base_type)
p = cast_suffix_end
2022-01-24 18:27:09 -05:00
if *1p != SYMBOL_RPAREN goto bad_cast ; e.g. (int ,)5
out += 4
*4out = a
out += 4
p += 16
out = parse_expression(p, tokens_end, out)
return out
:bad_cast
token_error(tokens, .str_bad_cast)
:str_bad_cast
string Bad cast.
byte 0
2022-01-14 22:16:57 -05:00
:unary_address_of
*4type = type_create_pointer(*4a)
return out
:unary_dereference
if *1p != TYPE_POINTER goto unary_bad_type
*4type = *4a + 1
return out
:unary_type_logical_not
if *1p > TYPE_POINTER goto unary_bad_type
*4type = TYPE_INT
return out
:unary_type_integral
if *1p >= TYPE_FLOAT goto unary_bad_type
goto unary_type_promote
:unary_type_promote
if *1p > TYPE_DOUBLE goto unary_bad_type
*4type = type_promotion(*4a)
2022-01-14 10:08:02 -05:00
return out
:unary_type_arithmetic_nopromote
if *1p > TYPE_DOUBLE goto unary_bad_type
*4type = *4a
return out
2022-01-14 22:16:57 -05:00
:unary_bad_type
fprint_token_location(1, tokens)
puts(.str_unary_bad_type)
print_type(*4a)
putc(10)
exit(1)
:str_unary_bad_type
string : Bad type for unary operator:
byte 32
byte 0
2022-01-24 13:22:16 -05:00
:parse_sizeof
2022-01-26 17:01:06 -05:00
local sizeof_base_type
local sizeof_prefix
local sizeof_suffix
local sizeof_suffix_end
2022-01-24 13:22:16 -05:00
*1out = EXPRESSION_CONSTANT_INT
out += 4
*1out = TYPE_UNSIGNED_LONG
out += 4
p = best + 16
if *1p != SYMBOL_LPAREN goto parse_sizeof_expr
p += 16
b = token_is_type(p)
if b == 0 goto parse_sizeof_expr
; it's a type, e.g. sizeof(int)
2022-01-26 17:01:06 -05:00
sizeof_base_type = p
sizeof_prefix = type_get_base_end(sizeof_base_type)
sizeof_suffix = type_get_prefix_end(sizeof_prefix)
sizeof_suffix_end = type_get_suffix_end(sizeof_prefix)
p = sizeof_suffix_end
a = types_bytes_used
parse_type_declarators(sizeof_prefix, sizeof_suffix, sizeof_suffix, sizeof_suffix_end)
parse_base_type(sizeof_base_type)
if *1p != SYMBOL_RPAREN goto bad_expression ; e.g. sizeof(int ,)
2022-01-24 13:22:16 -05:00
*8out = type_sizeof(a)
goto parse_sizeof_finish
:parse_sizeof_expr
; it's an expression, e.g. sizeof(x+3)
local temp
temp = malloc(4000)
p = best + 16
parse_expression(p, tokens_end, temp)
p = temp + 4
*8out = type_sizeof(*4p)
free(temp)
:parse_sizeof_finish
out += 8
return out
2022-01-13 20:29:52 -05:00
:parse_expr_member ; -> or .
2022-01-13 23:20:45 -05:00
p = best + 16
if *1p != TOKEN_IDENTIFIER goto bad_expression
2022-01-27 10:57:18 -05:00
a = out + 4 ; pointer to type ID
out = parse_expression(tokens, best, out)
a = types + *4a
if c == EXPRESSION_DOT goto type_dot
if *1a != TYPE_POINTER goto arrow_non_pointer
a += 1
:type_dot
if *1a != TYPE_STRUCT goto member_non_struct
a += 1
a = *8a ; pointer to struct data
2022-01-13 23:20:45 -05:00
p += 8
2022-01-27 10:57:18 -05:00
c = ident_list_lookup(a, *8p)
if c == 0 goto member_not_in_struct
*8out = c & 0xffffffff ; offset
*4type = c > 32 ; type
out += 8
2022-01-13 23:20:45 -05:00
p += 8
if p != tokens_end goto bad_expression ; e.g. foo->bar hello
return out
2022-01-27 10:57:18 -05:00
:arrow_non_pointer
token_error(p, .str_arrow_non_pointer)
:str_arrow_non_pointer
string Trying to use -> operator on a non-pointer type.
byte 0
:member_non_struct
token_error(p, .str_member_non_struct)
:str_member_non_struct
string Trying to access member of something other than a (complete) structure/union.
byte 0
:member_not_in_struct
token_error(p, .str_member_not_in_struct)
:str_member_not_in_struct
string Trying to access non-existent member of structure or union.
byte 0
2022-01-13 20:29:52 -05:00
:parse_conditional
2022-01-23 19:19:49 -05:00
depth = 0 ; bracket depth
n = 0 ; ? : depth
; find : associated with this ?
p = best + 16
:parse_conditional_loop
if p >= tokens_end goto bad_expression
if *1p == SYMBOL_QUESTION goto parse_cond_incn
if *1p == SYMBOL_COLON goto parse_cond_decn
if *1p == SYMBOL_LPAREN goto parse_cond_incdepth
if *1p == SYMBOL_RPAREN goto parse_cond_decdepth
if *1p == SYMBOL_LSQUARE goto parse_cond_incdepth
if *1p == SYMBOL_RSQUARE goto parse_cond_decdepth
:parse_cond_cont
p += 16
goto parse_conditional_loop
:parse_cond_incdepth
depth += 1
goto parse_cond_cont
:parse_cond_decdepth
depth -= 1
goto parse_cond_cont
:parse_cond_incn
n += 1
goto parse_cond_cont
:parse_cond_decn
n -= 1
if n >= 0 goto parse_cond_cont
if depth > 0 goto parse_cond_cont
; okay, q now points to the :
*1out = EXPRESSION_CONDITIONAL
out += 8
out = parse_expression(tokens, best, out)
a = out + 4 ; type of left branch of conditional
best += 16
out = parse_expression(best, p, out)
b = out + 4 ; type of right branch of conditional
p += 16
out = parse_expression(p, tokens_end, out)
p = types + *4a
if *1p == TYPE_STRUCT goto parse_cond_ltype
if *1p == TYPE_VOID goto parse_cond_ltype
if *1p == TYPE_POINTER goto parse_cond_ltype ; @NONSTANDARD: we don't handle sizeof *(0 ? (void*)0 : "hello") correctly--it should be 1 (a standard-compliant implementation is annoyingly complicated)
*4type = expr_binary_type_usual_conversions(tokens, *4a, *4b)
return out
:parse_cond_ltype
; no conversions
*4type = *4a
return out
2022-01-13 20:29:52 -05:00
:parse_postincrement
2022-01-13 23:20:45 -05:00
*1out = EXPRESSION_POST_INCREMENT
p = tokens_end - 16
if *1p != SYMBOL_PLUS_PLUS goto bad_expression ; e.g. a ++ b
2022-01-14 13:41:44 -05:00
out += 8
a = out + 4 ; type of operand
2022-01-13 23:20:45 -05:00
out = parse_expression(tokens, p, out)
2022-01-14 13:41:44 -05:00
*4type = *4a ; this expression's type is the operand's type (yes even for types smaller than int)
2022-01-13 23:20:45 -05:00
return out
2022-01-14 13:41:44 -05:00
2022-01-13 20:29:52 -05:00
:parse_postdecrement
2022-01-13 23:20:45 -05:00
*1out = EXPRESSION_POST_DECREMENT
p = tokens_end - 16
if *1p != SYMBOL_MINUS_MINUS goto bad_expression ; e.g. a -- b
2022-01-14 13:41:44 -05:00
out += 8
a = out + 4 ; type of operand
2022-01-13 23:20:45 -05:00
out = parse_expression(tokens, p, out)
2022-01-14 13:41:44 -05:00
*4type = *4a ; type of this = type of operand
2022-01-13 23:20:45 -05:00
return out
2022-01-13 18:13:29 -05:00
2022-01-13 16:12:28 -05:00
:single_token_expression
in = tokens
c = *1in
if c == TOKEN_CONSTANT_INT goto expression_integer
if c == TOKEN_CONSTANT_CHAR goto expression_integer ; character constants are basically the same as integer constants
if c == TOKEN_CONSTANT_FLOAT goto expression_float
if c == TOKEN_STRING_LITERAL goto expression_string_literal
2022-01-24 20:53:37 -05:00
if c == TOKEN_IDENTIFIER goto expression_identifier
2022-01-13 23:20:45 -05:00
goto unrecognized_expression
2022-01-24 20:53:37 -05:00
:expression_identifier
in += 8
a = *8in
in += 8
; check if it's an enumerator
c = ident_list_lookup_check(enumerators, a, &n)
if c == 0 goto not_enumerator
; it is an enumerator
*1out = EXPRESSION_CONSTANT_INT
out += 4
*4out = TYPE_INT
out += 4
*8out = n
2022-01-25 17:50:00 -05:00
out += 8
2022-01-24 20:53:37 -05:00
return out
:not_enumerator
in -= 16
token_error(in, .str_undeclared_variable)
:str_undeclared_variable
string Undeclared variable.
byte 0
2022-01-13 16:12:28 -05:00
:expression_integer
*1out = EXPRESSION_CONSTANT_INT
p = in + 8
value = *8p
p = out + 8
*8p = value
p = in + 1
a = int_suffix_to_type(*1p) ; what the suffix says the type should be
b = int_value_to_type(value) ; what the value says the type should be (if the value is too large to fit in int)
a = max_signed(a, b) ; take the maximum of the two types
; make sure that if the integer has a u suffix, the type will be unsigned
a &= b | 0xfe
p = out + 4
*4p = a
in += 16
out += 16
return out
:expression_float
*1out = EXPRESSION_CONSTANT_FLOAT
p = in + 8
value = *8p
p = out + 8
*8p = value
p = in + 1
a = float_suffix_to_type(*1p)
p = out + 4
*4p = a
in += 16
out += 16
return out
:expression_string_literal
*1out = EXPRESSION_STRING_LITERAL
p = in + 8
value = *8p
p = out + 8
*8p = value
; we already know this is char*
p = out + 4
*4p = TYPE_POINTER_TO_CHAR
in += 16
out += 16
return out
:empty_expression
token_error(tokens, .str_empty_expression)
:str_empty_expression
string Empty expression.
byte 0
2022-01-13 23:20:45 -05:00
:bad_expression
token_error(tokens, .str_bad_expression)
:str_bad_expression
string Bad expression.
byte 0
2022-01-13 16:12:28 -05:00
:unrecognized_expression
token_error(tokens, .str_unrecognized_expression)
:str_unrecognized_expression
string Unrecognized expression.
byte 0
2022-01-13 18:13:29 -05:00
:expr_too_many_closing_brackets
token_error(tokens, .str_too_many_closing_brackets)
:str_too_many_closing_brackets
string Too many closing brackets.
byte 0
2022-01-13 16:12:28 -05:00
:return_type_int
return TYPE_INT
:return_type_long
return TYPE_LONG
:return_type_unsigned_int
return TYPE_UNSIGNED_INT
:return_type_unsigned_long
return TYPE_UNSIGNED_LONG
:return_type_float
return TYPE_FLOAT
:return_type_double
return TYPE_DOUBLE
2022-01-13 18:13:29 -05:00
2022-01-24 13:22:16 -05:00
function type_sizeof
argument type
local p
local c
p = types + type
c = *1p
if c == TYPE_CHAR goto return_1
if c == TYPE_UNSIGNED_CHAR goto return_1
if c == TYPE_SHORT goto return_2
if c == TYPE_UNSIGNED_SHORT goto return_2
if c == TYPE_INT goto return_4
if c == TYPE_UNSIGNED_INT goto return_4
if c == TYPE_LONG goto return_8
if c == TYPE_UNSIGNED_LONG goto return_8
if c == TYPE_FLOAT goto return_4
if c == TYPE_DOUBLE goto return_8
if c == TYPE_VOID goto return_1
if c == TYPE_POINTER goto return_8
if c == TYPE_FUNCTION goto return_8
if c == TYPE_ARRAY goto sizeof_array
2022-01-26 16:40:11 -05:00
fputs(2, .str_sizeof_ni) ; @TODO
exit(1)
:str_sizeof_ni
string type_sizeof for this type not implemented.
byte 0
2022-01-24 13:22:16 -05:00
:sizeof_array
local n
p += 1
n = *8p
p += 8
p -= types
c = type_sizeof(p)
return n * c
2022-01-26 18:00:47 -05:00
function type_alignof
argument type
local p
local c
p = types + type
c = *1p
if c == TYPE_CHAR goto return_1
if c == TYPE_UNSIGNED_CHAR goto return_1
if c == TYPE_SHORT goto return_2
if c == TYPE_UNSIGNED_SHORT goto return_2
if c == TYPE_INT goto return_4
if c == TYPE_UNSIGNED_INT goto return_4
if c == TYPE_LONG goto return_8
if c == TYPE_UNSIGNED_LONG goto return_8
if c == TYPE_FLOAT goto return_4
if c == TYPE_DOUBLE goto return_8
if c == TYPE_VOID goto return_1
if c == TYPE_POINTER goto return_8
if c == TYPE_FUNCTION goto return_8
if c == TYPE_ARRAY goto alignof_array
fputs(2, .str_alignof_ni) ; @TODO
exit(1)
:str_alignof_ni
string type_alignof for this type not implemented.
byte 0
:alignof_array
p = type + 9 ; skip TYPE_ARRAY and size
return type_alignof(p)
; evaluate an expression which can be the size of an array, e.g.
; enum { A, B, C };
; int x[A * sizeof(float) + 3 << 5];
; @NONSTANDARD: doesn't handle floats, but really why would you use floats in an array size
; e.g. SomeType x[(int)3.3];
; this is also used for #if evaluation
2022-01-24 20:53:37 -05:00
; token is used for error messages (e.g. if this "constant" expression is *x or something)
; NOTE: this returns the end of the expression, not the value (which is stored in *8p_value)
function evaluate_constant_expression
2022-01-24 18:27:09 -05:00
argument token
argument expr
argument p_value
2022-01-21 20:10:27 -05:00
local a
local b
2022-01-23 10:55:57 -05:00
local c
2022-01-21 23:24:18 -05:00
local p
2022-01-23 10:37:00 -05:00
local mask
local type
type = expr + 4
type = *4type
2022-01-23 10:55:57 -05:00
c = *1expr
if c == EXPRESSION_CONSTANT_INT goto eval_constant_int
if c == EXPRESSION_IDENTIFIER goto eval_constant_identifier
if c == EXPRESSION_UNARY_PLUS goto eval_unary_plus
if c == EXPRESSION_UNARY_MINUS goto eval_unary_minus
if c == EXPRESSION_BITWISE_NOT goto eval_bitwise_not
if c == EXPRESSION_LOGICAL_NOT goto eval_logical_not
2022-01-24 18:27:09 -05:00
if c == EXPRESSION_CAST goto eval_cast
2022-01-23 10:55:57 -05:00
if c == EXPRESSION_ADD goto eval_add
if c == EXPRESSION_SUB goto eval_sub
if c == EXPRESSION_MUL goto eval_mul
if c == EXPRESSION_DIV goto eval_div
if c == EXPRESSION_REMAINDER goto eval_remainder
if c == EXPRESSION_LSHIFT goto eval_lshift
if c == EXPRESSION_RSHIFT goto eval_rshift
if c == EXPRESSION_EQ goto eval_eq
if c == EXPRESSION_NEQ goto eval_neq
if c == EXPRESSION_LT goto eval_lt
if c == EXPRESSION_GT goto eval_gt
if c == EXPRESSION_LEQ goto eval_leq
if c == EXPRESSION_GEQ goto eval_geq
if c == EXPRESSION_BITWISE_AND goto eval_bitwise_and
if c == EXPRESSION_BITWISE_OR goto eval_bitwise_or
if c == EXPRESSION_BITWISE_XOR goto eval_bitwise_xor
if c == EXPRESSION_LOGICAL_AND goto eval_logical_and
if c == EXPRESSION_LOGICAL_OR goto eval_logical_or
2022-01-23 19:19:49 -05:00
if c == EXPRESSION_CONDITIONAL goto eval_conditional
2022-01-23 10:55:57 -05:00
2022-01-21 20:10:27 -05:00
2022-01-24 18:27:09 -05:00
token_error(token, .str_eval_bad_exprtype)
:str_eval_bad_exprtype
string Can't evaluate constant expression.
2022-01-21 20:10:27 -05:00
byte 0
2022-01-24 18:27:09 -05:00
:eval_cast
p = types + type
if *1p == TYPE_VOID goto eval_cast_bad_type
if *1p > TYPE_UNSIGNED_LONG goto eval_cast_bad_type
expr += 8
; @NONSTANDARD: we don't support, for example, int x[(int)(float)5];
expr = evaluate_constant_expression(token, expr, p_value)
goto eval_fit_to_type
:eval_cast_bad_type
token_error(token, .str_eval_cast_bad_type)
:str_eval_cast_bad_type
string Bad type for constant cast (note: floating-point casts are not supported even though they are standard).
byte 0
:eval_constant_identifier
; @TODO: enum values
fputs(2, .str_constant_identifier)
exit(1)
:str_constant_identifier
string Constant identifiers not handled (see @TODO).
byte 10
byte 0
:eval_constant_int
expr += 8
*8p_value = *8expr
expr += 8
return expr
:eval_unary_plus
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, p_value)
return expr
:eval_unary_minus
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
2022-01-21 20:10:27 -05:00
*8p_value = 0 - a
2022-01-23 10:37:00 -05:00
goto eval_fit_to_type
2022-01-21 20:10:27 -05:00
:eval_bitwise_not
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
2022-01-21 20:10:27 -05:00
*8p_value = ~a
2022-01-23 10:37:00 -05:00
goto eval_fit_to_type
2022-01-21 20:10:27 -05:00
:eval_logical_not
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
2022-01-23 10:55:57 -05:00
if a == 0 goto eval_value_1
goto eval_value_0
2022-01-21 23:24:18 -05:00
:eval_add
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
expr = evaluate_constant_expression(token, expr, &b)
2022-01-21 23:24:18 -05:00
*8p_value = a + b
2022-01-23 10:37:00 -05:00
goto eval_fit_to_type
2022-01-21 23:24:18 -05:00
:eval_sub
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
expr = evaluate_constant_expression(token, expr, &b)
2022-01-21 23:24:18 -05:00
*8p_value = a - b
2022-01-23 10:37:00 -05:00
goto eval_fit_to_type
2022-01-21 20:10:27 -05:00
:eval_mul
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
expr = evaluate_constant_expression(token, expr, &b)
2022-01-21 20:10:27 -05:00
*8p_value = a * b
2022-01-23 10:37:00 -05:00
goto eval_fit_to_type
2022-01-21 23:24:18 -05:00
:eval_div
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
expr = evaluate_constant_expression(token, expr, &b)
2022-01-21 23:24:18 -05:00
if *1p == TYPE_UNSIGNED_LONG goto eval_div_unsigned
; division is signed or uses a small type, so we can use 64-bit signed division
*8p_value = a / b
2022-01-23 10:37:00 -05:00
goto eval_fit_to_type
2022-01-21 23:24:18 -05:00
:eval_div_unsigned
; must use unsigned division
divmod_unsigned(a, b, p_value, &a)
2022-01-23 10:37:00 -05:00
goto eval_fit_to_type
2022-01-21 23:24:18 -05:00
:eval_remainder
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
expr = evaluate_constant_expression(token, expr, &b)
2022-01-23 10:37:00 -05:00
p = types + type
2022-01-21 23:24:18 -05:00
if *1p == TYPE_UNSIGNED_LONG goto eval_rem_unsigned
*8p_value = a % b
2022-01-23 10:37:00 -05:00
goto eval_fit_to_type
2022-01-21 23:24:18 -05:00
:eval_rem_unsigned
divmod_unsigned(a, b, &a, p_value)
2022-01-23 10:37:00 -05:00
goto eval_fit_to_type
:eval_lshift
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
expr = evaluate_constant_expression(token, expr, &b)
2022-01-23 10:37:00 -05:00
*8p_value = a < b
goto eval_fit_to_type
:eval_rshift
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
expr = evaluate_constant_expression(token, expr, &b)
2022-01-23 10:37:00 -05:00
p = types + type
p = *1p
p &= 1 ; signed types are odd
if p == 1 goto eval_signed_rshift
*8p_value = a > b
goto eval_fit_to_type
:eval_signed_rshift
local v
mask = a > 63 ; sign bit
; sign extension
mask <= b
mask -= 1
mask <= 64 - b
v = a > b
v += mask
*8p_value = v
goto eval_fit_to_type
2022-01-21 23:24:18 -05:00
2022-01-23 10:37:00 -05:00
; comparison masks:
; 1 = less than
; 2 = equal to
; 4 = greater than
; e.g. not-equal is 1|4 = 5 because not equal = less than or greater than
:eval_eq
mask = 2
goto eval_comparison
:eval_neq
mask = 5
goto eval_comparison
:eval_lt
mask = 1
goto eval_comparison
:eval_gt
mask = 4
goto eval_comparison
:eval_leq
mask = 3
goto eval_comparison
:eval_geq
mask = 6
goto eval_comparison
:eval_comparison
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
expr = evaluate_constant_expression(token, expr, &b)
2022-01-23 10:37:00 -05:00
p = types + type
p = *1p
p &= 1
if a == b goto eval_comparison_eq
; for checking < and >, we care about whether a and b are signed
if p == 1 goto eval_signed_comparison
if a ] b goto eval_comparison_gt
goto eval_comparison_lt
:eval_signed_comparison
if a > b goto eval_comparison_gt
goto eval_comparison_lt
:eval_comparison_eq
; a == b
mask &= 2
goto eval_comparison_done
:eval_comparison_lt
; a < b
mask &= 1
goto eval_comparison_done
:eval_comparison_gt
; a > b
mask &= 4
goto eval_comparison_done
:eval_comparison_done
2022-01-23 10:55:57 -05:00
if mask != 0 goto eval_value_1
goto eval_value_0
:eval_bitwise_and
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
expr = evaluate_constant_expression(token, expr, &b)
2022-01-23 10:55:57 -05:00
*8p_value = a & b
goto eval_fit_to_type
:eval_bitwise_or
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
expr = evaluate_constant_expression(token, expr, &b)
2022-01-23 10:55:57 -05:00
*8p_value = a | b
goto eval_fit_to_type
:eval_bitwise_xor
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
expr = evaluate_constant_expression(token, expr, &b)
2022-01-23 10:55:57 -05:00
*8p_value = a ^ b
goto eval_fit_to_type
:eval_logical_and
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
2022-01-23 10:55:57 -05:00
if a == 0 goto eval_value_0
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &b)
2022-01-23 10:55:57 -05:00
if b == 0 goto eval_value_0
goto eval_value_1
:eval_logical_or
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &a)
2022-01-23 10:55:57 -05:00
if a != 0 goto eval_value_1
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &b)
2022-01-23 10:55:57 -05:00
if b != 0 goto eval_value_1
goto eval_value_0
2022-01-23 19:19:49 -05:00
:eval_conditional
expr += 8
2022-01-24 18:27:09 -05:00
expr = evaluate_constant_expression(token, expr, &mask)
expr = evaluate_constant_expression(token, expr, &a)
expr = evaluate_constant_expression(token, expr, &b)
2022-01-23 19:19:49 -05:00
if mask == 0 goto eval_conditional_b
*8p_value = a
goto eval_fit_to_type
:eval_conditional_b
*8p_value = b
goto eval_fit_to_type
2022-01-23 10:37:00 -05:00
:eval_fit_to_type
*8p_value = fit_to_type(*8p_value, type)
return expr
2022-01-23 10:55:57 -05:00
:eval_value_0
*8p_value = 0
return expr
:eval_value_1
*8p_value = 1
return expr
2022-01-23 10:37:00 -05:00
; value is the output of some arithmetic expression; correct it to be within the range of type.
function fit_to_type
argument value
argument type
local c
local s
c = types + type
c = *1c
if c == TYPE_CHAR goto fit_to_type_char
if c == TYPE_UNSIGNED_CHAR goto fit_to_type_uchar
if c == TYPE_SHORT goto fit_to_type_short
if c == TYPE_UNSIGNED_SHORT goto fit_to_type_ushort
if c == TYPE_INT goto fit_to_type_int
if c == TYPE_UNSIGNED_INT goto fit_to_type_uint
if c == TYPE_LONG goto fit_to_type_long
if c == TYPE_UNSIGNED_LONG goto fit_to_type_ulong
fputs(2, .str_bad_fit_to_type)
exit(1)
:str_bad_fit_to_type
string Bad type passed to fit_to_type.
byte 10
byte 0
; yes, signed integer overflow is undefined behavior and
; casting to a signed integer is implementation-defined;
; i'm going to play it safe and implement it properly
:fit_to_type_char
value &= 0xff
s = value > 7 ; sign bit
value += s * 0xffffffffffffff00 ; sign-extend
return value
:fit_to_type_uchar
value &= 0xff
return value
:fit_to_type_short
value &= 0xffff
s = value > 15 ; sign bit
value += s * 0xffffffffffff0000 ; sign-extend
return value
:fit_to_type_ushort
value &= 0xffff
return value
:fit_to_type_int
value &= 0xffffffff
s = value > 31 ; sign bit
value += s * 0xffffffff00000000 ; sign-extend
return value
:fit_to_type_uint
value &= 0xffffffff
return value
:fit_to_type_long
:fit_to_type_ulong
return value
2022-01-14 15:06:11 -05:00
; the "usual conversions" for binary operators, as the C standard calls it
2022-01-14 13:41:44 -05:00
function expr_binary_type_usual_conversions
argument token ; for errors
argument type1
argument type2
local ptype1
local ptype2
2022-01-14 22:16:57 -05:00
local kind1
local kind2
2022-01-14 13:41:44 -05:00
if type1 == 0 goto return_0
if type2 == 0 goto return_0
2022-01-14 15:06:11 -05:00
2022-01-14 13:41:44 -05:00
ptype1 = types + type1
ptype2 = types + type2
2022-01-14 22:16:57 -05:00
kind1 = *1ptype1
kind2 = *1ptype2
if kind1 > TYPE_DOUBLE goto usual_bad_types_to_operator
if kind2 > TYPE_DOUBLE goto usual_bad_types_to_operator
2022-01-14 13:41:44 -05:00
; "if either operand has type double, the other operand is converted to double"
2022-01-14 22:16:57 -05:00
if kind1 == TYPE_DOUBLE goto return_type_double
if kind2 == TYPE_DOUBLE goto return_type_double
2022-01-14 13:41:44 -05:00
; "if either operand has type float, the other operand is converted to float"
2022-01-14 22:16:57 -05:00
if kind1 == TYPE_FLOAT goto return_type_float
if kind2 == TYPE_FLOAT goto return_type_float
2022-01-14 13:41:44 -05:00
; "If either operand has type unsigned long int, the other operand is converted to unsigned long int"
2022-01-14 22:16:57 -05:00
if kind1 == TYPE_UNSIGNED_LONG goto return_type_unsigned_long
if kind2 == TYPE_UNSIGNED_LONG goto return_type_unsigned_long
2022-01-14 13:41:44 -05:00
; "if either operand has type long int, the other operand is converted to long int"
2022-01-14 22:16:57 -05:00
if kind1 == TYPE_LONG goto return_type_long
if kind2 == TYPE_LONG goto return_type_long
2022-01-14 13:41:44 -05:00
; "if either operand has type unsigned int, the other operand is converted to unsigned int."
2022-01-14 22:16:57 -05:00
if kind1 == TYPE_UNSIGNED_INT goto return_type_unsigned_int
if kind2 == TYPE_UNSIGNED_INT goto return_type_unsigned_int
2022-01-14 13:41:44 -05:00
; "Otherwise, both operands have type int."
goto return_type_int
:str_space_and_space
string and
byte 32
byte 0
2022-01-14 15:06:11 -05:00
:usual_bad_types_to_operator
bad_types_to_operator(token, type1, type2)
function bad_types_to_operator
argument token
argument type1
argument type2
2022-01-14 22:16:57 -05:00
fprint_token_location(1, token)
puts(.str_bad_types_to_operator)
2022-01-14 15:06:11 -05:00
print_type(type1)
2022-01-14 22:16:57 -05:00
puts(.str_space_and_space)
2022-01-14 15:06:11 -05:00
print_type(type2)
putc(10)
exit(1)
:str_bad_types_to_operator
string : Bad types to operator:
byte 32
byte 0
2022-01-14 13:41:44 -05:00
function type_promotion
argument type
2022-01-14 22:16:57 -05:00
local p
p = types + type
if *1p < TYPE_INT goto return_type_int
2022-01-14 13:41:44 -05:00
return type
2022-01-14 11:52:26 -05:00
; return precedence of given operator token, or 0xffff if not an operator
2022-01-13 18:13:29 -05:00
function operator_precedence
2022-01-14 11:52:26 -05:00
argument token
argument is_first
local op
local b
2022-01-14 11:52:26 -05:00
if is_first != 0 goto operator_precedence_unary
; if an operator is preceded by another, it must be a unary operator, e.g.
; in 5 + *x, * is a unary operator
op = token - 16
op = *1op
if op == SYMBOL_RPAREN goto figre_out_rparen_arity
2022-01-14 11:52:26 -05:00
op = is_operator(op)
; if an operator is immediately followed by another (including lparen), the second must be
; unary.
2022-01-14 11:52:26 -05:00
if op != 0 goto operator_precedence_unary
:operator_precedence_binary
2022-01-14 11:52:26 -05:00
op = *1token
2022-01-13 18:13:29 -05:00
; see "C OPERATOR PRECEDENCE" in constants.b
if op == SYMBOL_COMMA goto return_0x10
if op == SYMBOL_EQ goto return_0x20
if op == SYMBOL_PLUS_EQ goto return_0x20
if op == SYMBOL_MINUS_EQ goto return_0x20
if op == SYMBOL_TIMES_EQ goto return_0x20
if op == SYMBOL_DIV_EQ goto return_0x20
if op == SYMBOL_PERCENT_EQ goto return_0x20
if op == SYMBOL_LSHIFT_EQ goto return_0x20
if op == SYMBOL_RSHIFT_EQ goto return_0x20
if op == SYMBOL_AND_EQ goto return_0x20
if op == SYMBOL_OR_EQ goto return_0x20
if op == SYMBOL_XOR_EQ goto return_0x20
if op == SYMBOL_QUESTION goto return_0x30
if op == SYMBOL_OR_OR goto return_0x40
if op == SYMBOL_AND_AND goto return_0x50
if op == SYMBOL_OR goto return_0x60
if op == SYMBOL_XOR goto return_0x70
if op == SYMBOL_AND goto return_0x80
if op == SYMBOL_EQ_EQ goto return_0x90
if op == SYMBOL_NOT_EQ goto return_0x90
if op == SYMBOL_LT goto return_0xa0
if op == SYMBOL_GT goto return_0xa0
if op == SYMBOL_LT_EQ goto return_0xa0
if op == SYMBOL_GT_EQ goto return_0xa0
if op == SYMBOL_LSHIFT goto return_0xb0
if op == SYMBOL_RSHIFT goto return_0xb0
if op == SYMBOL_PLUS goto return_0xc0
if op == SYMBOL_MINUS goto return_0xc0
if op == SYMBOL_TIMES goto return_0xd0
if op == SYMBOL_DIV goto return_0xd0
if op == SYMBOL_PERCENT goto return_0xd0
if op == SYMBOL_ARROW goto return_0xf0
if op == SYMBOL_DOT goto return_0xf0
if op == SYMBOL_LPAREN goto return_0xf0 ; function call
if op == SYMBOL_LSQUARE goto return_0xf0 ; subscript
if op == SYMBOL_PLUS_PLUS goto return_0xf0
if op == SYMBOL_MINUS_MINUS goto return_0xf0
return 0xffff
2022-01-13 16:12:28 -05:00
2022-01-13 18:13:29 -05:00
:operator_precedence_unary
2022-01-14 11:52:26 -05:00
op = *1token
2022-01-13 20:29:52 -05:00
if op == KEYWORD_SIZEOF goto return_0xe0
2022-01-13 18:13:29 -05:00
if op == SYMBOL_PLUS_PLUS goto return_0xe0
if op == SYMBOL_MINUS_MINUS goto return_0xe0
if op == SYMBOL_AND goto return_0xe0
if op == SYMBOL_TIMES goto return_0xe0
if op == SYMBOL_PLUS goto return_0xe0
if op == SYMBOL_MINUS goto return_0xe0
if op == SYMBOL_TILDE goto return_0xe0
if op == SYMBOL_NOT goto return_0xe0
2022-01-27 10:57:18 -05:00
if op == SYMBOL_LPAREN goto cast_precedence
2022-01-13 18:13:29 -05:00
return 0xffff
2022-01-27 10:57:18 -05:00
:cast_precedence
; make sure this actually is a cast
; this is necessary to handle both
; - (x)->something
; and - (int)x->something
; correctly (in the first case, the arrow is the top-level operator, but in the second, the cast is)
token += 16
b = token_is_type(token)
if b == 0 goto return_0xffff
goto return_0xd8 ; it's a cast
:figre_out_rparen_arity
; given that the token before this one is a right-parenthesis, figure out if
; this is a unary or binary operator. this is (annoyingly) necessary, because:
; (int)-x; /* cast processed first */
; (y)-x; /* subtraction processed first */
local p
p = token - 16
token_reverse_to_matching_lparen(&p)
p += 16
b = token_is_type(p)
if b != 0 goto operator_precedence_unary ; e.g. (int)-x;
goto operator_precedence_binary ; e.g. (y)-x;
2022-01-27 10:57:18 -05:00
2022-01-14 10:08:02 -05:00
function unary_op_to_expression_type
argument op
if op == SYMBOL_PLUS_PLUS goto return_EXPRESSION_PRE_INCREMENT
if op == SYMBOL_MINUS_MINUS goto return_EXPRESSION_PRE_DECREMENT
if op == SYMBOL_AND goto return_EXPRESSION_ADDRESS_OF
if op == SYMBOL_TIMES goto return_EXPRESSION_DEREFERENCE
if op == SYMBOL_PLUS goto return_EXPRESSION_UNARY_PLUS
if op == SYMBOL_MINUS goto return_EXPRESSION_UNARY_MINUS
if op == SYMBOL_TILDE goto return_EXPRESSION_BITWISE_NOT
if op == SYMBOL_NOT goto return_EXPRESSION_LOGICAL_NOT
2022-01-24 18:27:09 -05:00
if op == SYMBOL_LPAREN goto return_EXPRESSION_CAST
2022-01-14 10:08:02 -05:00
return 0
:return_EXPRESSION_PRE_INCREMENT
return EXPRESSION_PRE_INCREMENT
:return_EXPRESSION_PRE_DECREMENT
return EXPRESSION_PRE_INCREMENT
:return_EXPRESSION_ADDRESS_OF
return EXPRESSION_ADDRESS_OF
:return_EXPRESSION_DEREFERENCE
return EXPRESSION_DEREFERENCE
:return_EXPRESSION_UNARY_PLUS
return EXPRESSION_UNARY_PLUS
:return_EXPRESSION_UNARY_MINUS
return EXPRESSION_UNARY_MINUS
:return_EXPRESSION_BITWISE_NOT
return EXPRESSION_BITWISE_NOT
:return_EXPRESSION_LOGICAL_NOT
return EXPRESSION_LOGICAL_NOT
2022-01-24 18:27:09 -05:00
:return_EXPRESSION_CAST
return EXPRESSION_CAST
2022-01-14 10:08:02 -05:00
2022-01-13 18:13:29 -05:00
; is this operator right-associative? most C operators are left associative,
; but += / -= / etc. are not
function operator_right_associative
argument op
2022-01-23 19:19:49 -05:00
if op == SYMBOL_QUESTION goto return_1
2022-01-13 18:13:29 -05:00
if op < SYMBOL_EQ goto return_0
if op > SYMBOL_OR_EQ goto return_0
goto return_1
2022-01-13 20:29:52 -05:00
:binop_table
byte SYMBOL_COMMA
byte EXPRESSION_COMMA
byte SYMBOL_EQ
byte EXPRESSION_ASSIGN
byte SYMBOL_PLUS_EQ
byte EXPRESSION_ASSIGN_ADD
byte SYMBOL_MINUS_EQ
byte EXPRESSION_ASSIGN_SUB
byte SYMBOL_TIMES_EQ
byte EXPRESSION_ASSIGN_MUL
byte SYMBOL_DIV_EQ
byte EXPRESSION_ASSIGN_DIV
byte SYMBOL_PERCENT_EQ
byte EXPRESSION_ASSIGN_REMAINDER
byte SYMBOL_LSHIFT_EQ
byte EXPRESSION_ASSIGN_LSHIFT
byte SYMBOL_RSHIFT_EQ
byte EXPRESSION_ASSIGN_RSHIFT
byte SYMBOL_AND_EQ
byte EXPRESSION_ASSIGN_AND
byte SYMBOL_OR_EQ
byte EXPRESSION_ASSIGN_OR
byte SYMBOL_XOR_EQ
byte EXPRESSION_ASSIGN_XOR
byte SYMBOL_OR_OR
byte EXPRESSION_LOGICAL_OR
byte SYMBOL_AND_AND
byte EXPRESSION_LOGICAL_AND
byte SYMBOL_OR
byte EXPRESSION_BITWISE_OR
byte SYMBOL_XOR
byte EXPRESSION_BITWISE_XOR
byte SYMBOL_AND
byte EXPRESSION_BITWISE_AND
byte SYMBOL_EQ_EQ
byte EXPRESSION_EQ
byte SYMBOL_NOT_EQ
byte EXPRESSION_NEQ
byte SYMBOL_LT
byte EXPRESSION_LT
byte SYMBOL_GT
byte EXPRESSION_GT
byte SYMBOL_LT_EQ
byte EXPRESSION_LEQ
byte SYMBOL_GT_EQ
byte EXPRESSION_GEQ
byte SYMBOL_LSHIFT
byte EXPRESSION_LSHIFT
byte SYMBOL_RSHIFT
byte EXPRESSION_RSHIFT
byte SYMBOL_PLUS
byte EXPRESSION_ADD
byte SYMBOL_MINUS
byte EXPRESSION_SUB
byte SYMBOL_TIMES
byte EXPRESSION_MUL
byte SYMBOL_DIV
byte EXPRESSION_DIV
byte SYMBOL_PERCENT
byte EXPRESSION_REMAINDER
byte SYMBOL_ARROW
byte EXPRESSION_ARROW
byte SYMBOL_DOT
byte EXPRESSION_DOT
byte SYMBOL_LSQUARE
byte EXPRESSION_SUBSCRIPT
byte 0
byte 0
function binop_symbol_to_expression_type
argument op
local p
p = .binop_table
:binop_symbol_to_expression_type_loop
if *1p == op goto binop_symbol_to_expression_type_found
p += 2
if *1p != 0 goto binop_symbol_to_expression_type_loop
return 0
:binop_symbol_to_expression_type_found
p += 1
return *1p
2022-01-14 11:52:26 -05:00
function is_operator
argument symbol
local b
b = binop_symbol_to_expression_type(symbol)
if b != 0 goto return_1
b = unary_op_to_expression_type(symbol)
if b != 0 goto return_1
goto return_0
2022-01-13 20:29:52 -05:00
function binop_expression_type_to_symbol
argument exprtype
local p
p = .binop_table
:binop_expr2symb_type_loop
p += 1
if *1p == exprtype goto binop_expr2symb_type_found
p += 1
if *1p != 0 goto binop_expr2symb_type_loop
return 0
:binop_expr2symb_type_found
p -= 1
return *1p
2022-01-13 16:12:28 -05:00
function int_suffix_to_type
argument suffix
if suffix == NUMBER_SUFFIX_L goto return_type_long
if suffix == NUMBER_SUFFIX_U goto return_type_unsigned_int
if suffix == NUMBER_SUFFIX_UL goto return_type_unsigned_long
goto return_type_int
function float_suffix_to_type
argument suffix
if suffix == NUMBER_SUFFIX_F goto return_type_float
goto return_type_double
; smallest integer type which can fit this value, only using unsigned if necessary
function int_value_to_type
argument value
if value [ 0x80000000 goto return_type_int
if value [ 0x8000000000000000 goto return_type_long
goto return_type_unsigned_long
2022-01-13 20:34:13 -05:00
; returns pointer to end of expression
function print_expression
argument expression
2022-01-13 16:12:28 -05:00
local c
2022-01-13 20:29:52 -05:00
local b
2022-01-13 16:12:28 -05:00
local p
p = expression + 4
2022-01-13 20:34:13 -05:00
if *4p == 0 goto print_expr_skip_type
2022-01-13 16:12:28 -05:00
putc(40)
print_type(*4p)
putc(41)
2022-01-13 20:34:13 -05:00
:print_expr_skip_type
2022-01-13 16:12:28 -05:00
c = *1expression
if c == EXPRESSION_CONSTANT_INT goto print_expr_int
if c == EXPRESSION_CONSTANT_FLOAT goto print_expr_float
if c == EXPRESSION_STRING_LITERAL goto print_expr_str
2022-01-13 23:20:45 -05:00
if c == EXPRESSION_POST_INCREMENT goto print_post_increment
if c == EXPRESSION_POST_DECREMENT goto print_post_decrement
if c == EXPRESSION_DOT goto print_expr_dot
if c == EXPRESSION_ARROW goto print_expr_arrow
2022-01-14 10:08:02 -05:00
if c == EXPRESSION_PRE_INCREMENT goto print_pre_increment
if c == EXPRESSION_PRE_DECREMENT goto print_pre_decrement
if c == EXPRESSION_ADDRESS_OF goto print_address_of
if c == EXPRESSION_DEREFERENCE goto print_dereference
if c == EXPRESSION_UNARY_PLUS goto print_unary_plus
if c == EXPRESSION_UNARY_MINUS goto print_unary_minus
if c == EXPRESSION_BITWISE_NOT goto print_bitwise_not
if c == EXPRESSION_LOGICAL_NOT goto print_logical_not
2022-01-24 18:27:09 -05:00
if c == EXPRESSION_CAST goto print_cast
2022-01-14 10:08:02 -05:00
2022-01-13 20:29:52 -05:00
b = binop_expression_type_to_symbol(c)
if b != 0 goto print_expr_binop
2022-01-13 23:20:45 -05:00
puts(.str_print_bad_expr)
exit(1)
:str_print_bad_expr
string Bad expression passed to print_expression.
byte 10
byte 0
2022-01-24 18:27:09 -05:00
:print_cast
; we've already printed the type
expression += 8
expression = print_expression(expression)
return expression
2022-01-13 16:12:28 -05:00
:print_expr_int
expression += 8
2022-01-25 17:50:00 -05:00
putn_signed(*8expression)
2022-01-13 20:29:52 -05:00
expression += 8
2022-01-13 20:34:13 -05:00
return expression
2022-01-13 16:12:28 -05:00
:print_expr_float
expression += 8
putx64(*8expression)
2022-01-13 20:29:52 -05:00
expression += 8
2022-01-13 20:34:13 -05:00
return expression
2022-01-13 16:12:28 -05:00
:print_expr_str
expression += 8
putc('0)
putc('x)
putx32(*8expression)
2022-01-13 20:29:52 -05:00
expression += 8
2022-01-13 20:34:13 -05:00
return expression
2022-01-13 20:29:52 -05:00
:print_expr_binop
2022-01-13 20:34:13 -05:00
putc(40)
2022-01-13 20:29:52 -05:00
expression += 8
2022-01-13 20:34:13 -05:00
expression = print_expression(expression) ; 1st operand
2022-01-13 20:29:52 -05:00
b = get_keyword_str(b)
puts(b)
2022-01-13 20:34:13 -05:00
expression = print_expression(expression) ; 2nd operand
putc(41)
return expression
2022-01-13 23:20:45 -05:00
:print_expr_dot
putc(40)
expression += 8
expression = print_expression(expression)
2022-01-27 10:57:18 -05:00
puts(.str_dot)
putn(*8expression)
expression += 8
2022-01-13 23:20:45 -05:00
putc(41)
return expression
:print_expr_arrow
putc(40)
expression += 8
expression = print_expression(expression)
puts(.str_arrow)
2022-01-27 10:57:18 -05:00
putn(*8expression)
expression += 8
2022-01-13 23:20:45 -05:00
putc(41)
return expression
:print_post_increment
putc(40)
expression += 8
expression = print_expression(expression)
putc('+)
putc('+)
putc(41)
return expression
:print_post_decrement
putc(40)
expression += 8
expression = print_expression(expression)
putc('-)
putc('-)
putc(41)
return expression
2022-01-14 10:08:02 -05:00
:print_pre_increment
putc(40)
putc('+)
putc('+)
expression += 8
expression = print_expression(expression)
putc(41)
return expression
:print_pre_decrement
putc(40)
putc('-)
putc('-)
expression += 8
expression = print_expression(expression)
putc(41)
return expression
:print_address_of
putc(40)
putc('&)
expression += 8
expression = print_expression(expression)
putc(41)
return expression
:print_dereference
putc(40)
putc('*)
expression += 8
expression = print_expression(expression)
putc(41)
return expression
:print_unary_plus
putc(40)
putc('+)
expression += 8
expression = print_expression(expression)
putc(41)
return expression
:print_unary_minus
putc(40)
putc('-)
expression += 8
expression = print_expression(expression)
putc(41)
return expression
:print_bitwise_not
putc(40)
putc('~)
expression += 8
expression = print_expression(expression)
putc(41)
return expression
:print_logical_not
putc(40)
putc('!)
expression += 8
expression = print_expression(expression)
putc(41)
return expression
2022-01-13 16:12:28 -05:00
; NOTE: to make things easier, the format which this outputs isn't the same as C's, specifically we have
; *int for pointer to int and [5]int for array of 5 ints
function print_type
argument type
local c
:print_type_top
c = types + type
c = *1c
if c == TYPE_VOID goto print_type_void
if c == TYPE_CHAR goto print_type_char
if c == TYPE_UNSIGNED_CHAR goto print_type_unsigned_char
if c == TYPE_SHORT goto print_type_short
if c == TYPE_UNSIGNED_SHORT goto print_type_unsigned_short
if c == TYPE_INT goto print_type_int
if c == TYPE_UNSIGNED_INT goto print_type_unsigned_int
if c == TYPE_LONG goto print_type_long
if c == TYPE_UNSIGNED_LONG goto print_type_unsigned_long
if c == TYPE_FLOAT goto print_type_float
if c == TYPE_DOUBLE goto print_type_double
if c == TYPE_POINTER goto print_type_pointer
if c == TYPE_ARRAY goto print_type_array
if c == TYPE_STRUCT goto print_type_struct
2022-01-19 22:23:29 -05:00
if c == TYPE_FUNCTION goto print_type_function
2022-01-13 16:12:28 -05:00
fputs(2, .str_bad_print_type)
exit(1)
:str_bad_print_type
string Bad type passed to print_type.
byte 10
byte 0
:print_type_void
return puts(.str_void)
:print_type_char
return puts(.str_char)
:print_type_unsigned_char
return puts(.str_unsigned_char)
:print_type_short
return puts(.str_short)
:print_type_unsigned_short
return puts(.str_unsigned_short)
:print_type_int
return puts(.str_int)
:print_type_unsigned_int
return puts(.str_unsigned_int)
:print_type_long
return puts(.str_long)
:print_type_unsigned_long
return puts(.str_unsigned_long)
:print_type_float
return puts(.str_float)
:print_type_double
return puts(.str_double)
:print_type_pointer
putc('*)
type += 1
goto print_type_top
:print_type_array
putc('[)
type += 1
c = types + type
putn(*8c) ; UNALIGNED
2022-01-13 16:12:28 -05:00
putc('])
type += 8
goto print_type_top
:print_type_struct
return puts(.str_struct)
2022-01-19 22:23:29 -05:00
:print_type_function
type += 1
putc(40)
2022-01-21 14:41:08 -05:00
putc(40)
2022-01-19 22:23:29 -05:00
:print_type_function_loop
c = types + type
if *1c == 0 goto print_type_function_loop_end
print_type(type)
putc(44)
2022-01-21 14:41:08 -05:00
type += type_length(type)
goto print_type_function_loop
2022-01-19 22:23:29 -05:00
:print_type_function_loop_end
type += 1 ; 0 terminator
putc(41)
putc(32)
putc('-)
putc('>)
putc(32)
print_type(type)
2022-01-21 14:41:08 -05:00
putc(41)
2022-01-19 22:23:29 -05:00
return