commas starting to work

This commit is contained in:
pommicket 2022-01-26 16:40:11 -05:00
parent 73b6a68559
commit 7ca37ea8db
4 changed files with 182 additions and 173 deletions

View file

@ -38,8 +38,6 @@ global struct_names
; for unions, offset will always be 0. ; for unions, offset will always be 0.
global structures global structures
global structures_bytes_used global structures_bytes_used
; ident list of type IDs
global parse_type_result
#include util.b #include util.b
#include idents.b #include idents.b

View file

@ -3,4 +3,5 @@
long double d; long double d;
} (*x)(void); } (*x)(void);
*/ */
typedef int Foo; typedef int *Foo[3+342<<5];
typedef int A, *B[10];

View file

@ -30,37 +30,62 @@ function parse_tokens
local ident local ident
local type local type
local p local p
local base_type
local base_type_end
local prefix
local prefix_end
local suffix
local suffix_end
token = tokens token = tokens
:parse_tokens_loop :parse_tokens_loop
if *1token == TOKEN_EOF goto parse_tokens_eof if *1token == TOKEN_EOF goto parse_tokens_eof
if *1token == KEYWORD_TYPEDEF goto parse_typedef if *1token == KEYWORD_TYPEDEF goto parse_typedef
byte 0xcc ; not implemented die(.str_parse_tokens_ni)
:str_parse_tokens_ni
string parse_tokens not implemented.
byte 0
:parse_typedef :parse_typedef
token += 16 base_type = token + 16
type = parse_type(&token) base_type_end = type_get_base_end(base_type)
if type != 0 goto typedef_no_ident
if *1token != SYMBOL_SEMICOLON goto typedef_no_semicolon token = base_type_end
putc('*)
p = parse_type_result
:typedef_loop :typedef_loop
if *1p == 255 goto typedef_loop_end prefix = token
ident = p prefix_end = type_get_prefix_end(prefix)
p = memchr(p, 0) if *1prefix_end != TOKEN_IDENTIFIER goto typedef_no_ident
p += 1 ident = prefix_end + 8
type = *8p ident = *8ident
p += 8 suffix = prefix_end + 16
puts(ident) suffix_end = type_get_suffix_end(suffix)
putc('B)
putc(':) putc(':)
putc(32) print_tokens(base_type, base_type_end)
putc('P)
putc(':)
print_tokens(prefix, prefix_end)
putc('S)
putc(':)
print_tokens(suffix, suffix_end)
type = types_bytes_used
parse_type_declarators(prefix, prefix_end, suffix, suffix_end)
parse_base_type(base_type)
print_type(type) print_type(type)
putc(10) putc(10)
ident_list_add(typedefs, ident, type) ident_list_add(typedefs, ident, type)
token = suffix_end
if *1token == SYMBOL_SEMICOLON goto typedef_loop_end
if *1token != SYMBOL_COMMA goto bad_typedef
token += 16 ; skip comma
goto typedef_loop goto typedef_loop
:typedef_loop_end :typedef_loop_end
token += 16 ; skip semicolon token += 16 ; skip semicolon
goto parse_tokens_loop goto parse_tokens_loop
:typedef_no_ident :typedef_no_ident
@ -68,10 +93,10 @@ function parse_tokens
:str_typedef_no_ident :str_typedef_no_ident
string No identifier in typedef declaration. string No identifier in typedef declaration.
byte 0 byte 0
:typedef_no_semicolon :bad_typedef
token_error(tokens, .str_typedef_no_semicolon) token_error(tokens, .str_bad_typedef)
:str_typedef_no_semicolon :str_bad_typedef
string typedef does not end with a semicolon. string Bad typedef.
byte 0 byte 0
:parse_tokens_eof :parse_tokens_eof
return return
@ -147,25 +172,12 @@ function parse_type
; struct Thing (*things[5])(void), *something_else[3]; ; struct Thing (*things[5])(void), *something_else[3];
; BBBBBBBBBBBB PP SSSSSSSSSS P SSS ; BBBBBBBBBBBB PP SSSSSSSSSS P SSS
; Here, we call `struct Thing` the "base type". ; Here, we call `struct Thing` the "base type".
byte 0xcc
argument p_token ; return the end of the base for this type.
local typeid function type_get_base_end
local token argument token
local c local c
local p
local n
local base_type_end
local depth
local prefix
local prefix_end
local suffix
local suffix_end
local ident
token = *8p_token
prefix = token
ident_list_clear(parse_type_result)
c = *1token c = *1token
if c == KEYWORD_STRUCT goto skip_struct_union_enum if c == KEYWORD_STRUCT goto skip_struct_union_enum
if c == KEYWORD_UNION goto skip_struct_union_enum if c == KEYWORD_UNION goto skip_struct_union_enum
@ -186,11 +198,30 @@ function parse_type
goto skip_base_type_loop goto skip_base_type_loop
:skip_base_type_loop_end :skip_base_type_loop_end
return token
; find end of 1st prefix :skip_struct_union_enum
base_type_end = token token += 16
if *1token != TOKEN_IDENTIFIER goto skip_sue_no_name
token += 16 ; struct *blah*
:skip_sue_no_name
if *1token != SYMBOL_LBRACE goto skip_base_type_loop_end ; e.g. struct Something x[5];
; okay we have something like
; struct {
; int x, y;
; } test;
token_skip_to_matching_rbrace(&token)
token += 16
goto skip_base_type_loop_end
:str_bad_type
string Bad type.
byte 0
ident = 0
; return the end of this type prefix
function type_get_prefix_end
argument token
local c
:find_prefix_end_loop :find_prefix_end_loop
c = *1token c = *1token
if c == TOKEN_IDENTIFIER goto found_prefix_end if c == TOKEN_IDENTIFIER goto found_prefix_end
@ -211,19 +242,23 @@ function parse_type
token += 16 token += 16
goto find_prefix_end_loop goto find_prefix_end_loop
:found_prefix_end :found_prefix_end
prefix_end = token return token
if *1token != TOKEN_IDENTIFIER goto parse_type_no_ident ; return the end of this type suffix
token += 8 ; NOTE: you must pass in the PREFIX.
ident = *8token ; (In general, we can't find the end of the suffix without knowing the prefix.)
token += 8 ; int (*x);
:parse_type_no_ident ; ^ suffix ends here
; (int *)
; ^ suffix ends here
suffix = token function type_get_suffix_end
argument prefix
local depth
local token
local c
; find end of suffix ; find end of suffix
token = base_type_end ; start back here so we can keep track of bracket depth token = prefix
depth = 0 ; parenthesis/square bracket depth depth = 0 ; parenthesis/square bracket depth
:suffix_end_loop :suffix_end_loop
c = *1token c = *1token
@ -234,7 +269,7 @@ function parse_type
if c == SYMBOL_RPAREN goto suffix_end_decdepth if c == SYMBOL_RPAREN goto suffix_end_decdepth
if c == SYMBOL_TIMES goto suffix_end_cont if c == SYMBOL_TIMES goto suffix_end_cont
if depth == 0 goto suffix_end_found if depth == 0 goto suffix_end_found
if c == TOKEN_EOF goto pt_bad_type if c == TOKEN_EOF goto type_get_suffix_bad_type
goto suffix_end_cont goto suffix_end_cont
:suffix_end_incdepth :suffix_end_incdepth
@ -249,87 +284,45 @@ function parse_type
goto suffix_end_loop goto suffix_end_loop
:suffix_end_found :suffix_end_found
suffix_end = token return token
:type_get_suffix_bad_type
token_error(prefix, .str_bad_type)
#define TYPEDEBUG ;
TYPEDEBUG putc('B)
TYPEDEBUG putc('a)
TYPEDEBUG putc('s)
TYPEDEBUG putc(':)
TYPEDEBUG putc(32)
TYPEDEBUG print_tokens(*8p_token, base_type_end)
TYPEDEBUG putc('P)
TYPEDEBUG putc('r)
TYPEDEBUG putc('e)
TYPEDEBUG putc(':)
TYPEDEBUG putc(32)
TYPEDEBUG print_tokens(prefix, prefix_end)
TYPEDEBUG putc('S)
TYPEDEBUG putc('u)
TYPEDEBUG putc('f)
TYPEDEBUG putc(':)
TYPEDEBUG putc(32)
TYPEDEBUG print_tokens(suffix, suffix_end)
typeid = types_bytes_used ; writes to *(types + types_bytes_used), and updates types_bytes_used
p = types + typeid function parse_type_declarators
p = parse_type_given_base_prefix_suffix(*8p_token, prefix, prefix_end, suffix, suffix_end, p)
if ident == 0 goto type_no_ident
ident_list_add(parse_type_result, typeid)
typeid = 0
:type_no_ident
*8p_token = suffix_end
types_bytes_used = p - types
return typeid
:skip_struct_union_enum
token += 16
if *1token != TOKEN_IDENTIFIER goto skip_sue_no_name
token += 16 ; struct *blah*
:skip_sue_no_name
if *1token != SYMBOL_LBRACE goto skip_base_type_loop_end ; e.g. struct Something x[5];
; okay we have something like
; struct {
; int x, y;
; } test;
token_skip_to_matching_rbrace(&token)
token += 16
goto skip_base_type_loop_end
:pt_bad_type
token_error(*8p_token, .str_bad_type)
:str_bad_type
string Bad type.
byte 0
function parse_type_given_base_prefix_suffix
argument base_type
argument prefix argument prefix
argument prefix_end argument prefix_end
argument suffix argument suffix
argument suffix_end argument suffix_end
argument out
local p local p
local expr local expr
local n local n
local c local c
local depth local depth
local out
; main loop for parsing types ; main loop for parsing types
:parse_type_loop :type_declarators_loop
p = prefix_end - 16 p = prefix_end - 16
if *1suffix == SYMBOL_LSQUARE goto parse_array_type if *1suffix == SYMBOL_LSQUARE goto parse_array_type
if *1suffix == SYMBOL_LPAREN goto parse_function_type if *1suffix == SYMBOL_LPAREN goto parse_function_type
if *1p == SYMBOL_TIMES goto parse_pointer_type if *1p == SYMBOL_TIMES goto parse_pointer_type
if suffix == suffix_end goto parse_base_type if suffix == suffix_end goto type_declarators_loop_end
if *1suffix == SYMBOL_RPAREN goto parse_type_remove_parentheses if *1suffix == SYMBOL_RPAREN goto parse_type_remove_parentheses
goto bps_bad_type goto parse_typedecls_bad_type
:parse_pointer_type :parse_pointer_type
out = types + types_bytes_used
*1out = TYPE_POINTER *1out = TYPE_POINTER
out += 1 types_bytes_used += 1
prefix_end = p prefix_end = p
goto parse_type_loop goto type_declarators_loop
:parse_array_type :parse_array_type
out = types + types_bytes_used
*1out = TYPE_ARRAY
types_bytes_used += 1
local prev_types local prev_types
local prev_types_bytes_used local prev_types_bytes_used
; little hack to avoid screwing up types like double[sizeof(int)] ; little hack to avoid screwing up types like double[sizeof(int)]
@ -339,74 +332,74 @@ function parse_type_given_base_prefix_suffix
types = malloc(4000) types = malloc(4000)
types_init(types, &types_bytes_used) types_init(types, &types_bytes_used)
expr = malloc(4000) expr = malloc(4000)
*1out = TYPE_ARRAY
out += 1
p = suffix p = suffix
token_skip_to_matching_rsquare(&p) token_skip_to_matching_rsquare(&p)
suffix += 16 ; skip [ suffix += 16 ; skip [
parse_expression(suffix, p, expr) parse_expression(suffix, p, expr)
;print_expression(expr) ;print_expression(expr)
;putc(10) ;putc(10)
evaluate_constant_expression(base_type, expr, &n) evaluate_constant_expression(prefix, expr, &n)
if n < 0 goto bad_array_size if n < 0 goto bad_array_size
*8out = n
out += 8
free(expr) free(expr)
free(types) free(types)
types = prev_types types = prev_types
types_bytes_used = prev_types_bytes_used types_bytes_used = prev_types_bytes_used
out = types + types_bytes_used
*8out = n
types_bytes_used += 8
suffix = p + 16 suffix = p + 16
goto parse_type_loop goto type_declarators_loop
:bad_array_size :bad_array_size
token_error(base_type, .str_bad_array_size) token_error(suffix, .str_bad_array_size)
:str_bad_array_size :str_bad_array_size
string Very large or negative array size. string Very large or negative array size.
byte 0 byte 0
:parse_function_type :parse_function_type
local prev_parse_type_result
prev_parse_type_result = parse_type_result
parse_type_result = ident_list_create(16000)
p = suffix + 16 p = suffix + 16
out = types + types_bytes_used
*1out = TYPE_FUNCTION *1out = TYPE_FUNCTION
out += 1 types_bytes_used += 1
:function_type_loop :function_type_loop
if *1p == SYMBOL_RPAREN goto function_type_loop_end ; only needed for 1st iteration byte 0xcc ; @TODO
n = parse_type(&p) out = types + types_bytes_used
if n != 0 goto fparam_have_type
c = ident_list_len(parse_type_result)
if c != 1 goto bps_bad_type
n = ident_list_value_at_index(parse_type_result, 0)
:fparam_have_type
n += type_length(n)
out = types + n
if *1p == SYMBOL_RPAREN goto function_type_loop_end
if *1p != SYMBOL_COMMA goto bps_bad_type
p += 16
goto function_type_loop
:function_type_loop_end
*1out = 0 *1out = 0
out += 1 types_bytes_used += 1
suffix = p + 16 suffix = p + 16
ident_list_free(parse_type_result) goto type_declarators_loop
parse_type_result = prev_parse_type_result
goto parse_type_loop
:parse_type_remove_parentheses :parse_type_remove_parentheses
if *1p != SYMBOL_LPAREN goto bps_bad_type if *1p != SYMBOL_LPAREN goto parse_typedecls_bad_type
prefix_end = p prefix_end = p
suffix += 16 suffix += 16
goto parse_type_loop goto type_declarators_loop
:parse_base_type :type_declarators_loop_end
if *1prefix == TOKEN_IDENTIFIER goto base_type_typedef return 0
if *1prefix == KEYWORD_STRUCT goto base_type_struct :parse_typedecls_bad_type
if *1prefix == KEYWORD_UNION goto base_type_union token_error(prefix, .str_bad_type)
if *1prefix == KEYWORD_ENUM goto base_type_enum
if *1prefix == KEYWORD_FLOAT goto base_type_float ; writes to *(types + types_bytes_used), and updates types_bytes_used (no return value)
if *1prefix == KEYWORD_VOID goto base_type_void function parse_base_type
argument base_type
local out
local flags
local p
local c
local depth
local expr
out = types + types_bytes_used
c = *1base_type
if c == TOKEN_IDENTIFIER goto base_type_typedef
if c == KEYWORD_STRUCT goto base_type_struct
if c == KEYWORD_UNION goto base_type_union
if c == KEYWORD_ENUM goto base_type_enum
if c == KEYWORD_FLOAT goto base_type_float
if c == KEYWORD_VOID goto base_type_void
; "normal" type like int, unsigned char, etc. ; "normal" type like int, unsigned char, etc.
local flags
; annoyingly, all of these are equivalent to `unsigned long`: ; annoyingly, all of these are equivalent to `unsigned long`:
; unsigned long int ; unsigned long int
; long unsigned int ; long unsigned int
@ -420,7 +413,7 @@ function parse_type_given_base_prefix_suffix
#define PARSETYPE_FLAG_LONG 16 #define PARSETYPE_FLAG_LONG 16
#define PARSETYPE_FLAG_DOUBLE 32 #define PARSETYPE_FLAG_DOUBLE 32
flags = 0 flags = 0
p = prefix p = base_type
:base_type_normal_loop :base_type_normal_loop
c = *1p c = *1p
p += 16 p += 16
@ -467,6 +460,8 @@ function parse_type_given_base_prefix_suffix
if flags == 32 goto base_type_double ; `double` if flags == 32 goto base_type_double ; `double`
if flags == 48 goto base_type_double ; `long double` (we use the same type for double and long double) if flags == 48 goto base_type_double ; `long double` (we use the same type for double and long double)
goto bad_base_type
:base_type_char :base_type_char
*1out = TYPE_CHAR *1out = TYPE_CHAR
out += 1 out += 1
@ -505,10 +500,12 @@ function parse_type_given_base_prefix_suffix
goto base_type_done goto base_type_done
:base_type_done :base_type_done
return out types_bytes_used = out - types
return 0
:base_type_struct :base_type_struct
:base_type_union :base_type_union
p = prefix + 16 p = base_type + 16
if *1p != TOKEN_IDENTIFIER goto base_type_struct_definition if *1p != TOKEN_IDENTIFIER goto base_type_struct_definition
p += 16 p += 16
if *1p == SYMBOL_LBRACE goto base_type_struct_definition if *1p == SYMBOL_LBRACE goto base_type_struct_definition
@ -527,7 +524,7 @@ function parse_type_given_base_prefix_suffix
out += 1 out += 1
goto base_type_done goto base_type_done
:base_type_struct_definition :base_type_struct_definition
if *1p != SYMBOL_LBRACE goto bps_bad_type if *1p != SYMBOL_LBRACE goto bad_base_type
byte 0xcc ; @TODO byte 0xcc ; @TODO
:base_type_enum :base_type_enum
local q local q
@ -536,9 +533,9 @@ function parse_type_given_base_prefix_suffix
out += 1 out += 1
types_bytes_used = out - types types_bytes_used = out - types
p = prefix + 16 p = base_type + 16
if *1p == SYMBOL_LBRACE goto enum_definition if *1p == SYMBOL_LBRACE goto enum_definition
if *1p != TOKEN_IDENTIFIER goto bps_bad_type ; e.g. enum int x; if *1p != TOKEN_IDENTIFIER goto bad_base_type ; e.g. enum int x;
p += 16 p += 16
if *1p == SYMBOL_LBRACE goto enum_definition if *1p == SYMBOL_LBRACE goto enum_definition
goto base_type_done ; just using an enum type, not defining it. goto base_type_done ; just using an enum type, not defining it.
@ -571,7 +568,7 @@ function parse_type_given_base_prefix_suffix
if *1q == SYMBOL_COMMA goto enum_comma_loop_end if *1q == SYMBOL_COMMA goto enum_comma_loop_end
if *1q == SYMBOL_RBRACE goto enum_comma_loop_end if *1q == SYMBOL_RBRACE goto enum_comma_loop_end
:enum_comma_deep :enum_comma_deep
if *1q == TOKEN_EOF goto bps_bad_type if *1q == TOKEN_EOF goto bad_base_type
c = *1q c = *1q
q += 16 q += 16
if c == SYMBOL_LPAREN goto enum_comma_incdepth if c == SYMBOL_LPAREN goto enum_comma_incdepth
@ -625,17 +622,21 @@ function parse_type_given_base_prefix_suffix
out += 1 out += 1
goto base_type_done goto base_type_done
:base_type_typedef :base_type_typedef
p = prefix + 8 p = base_type + 8
c = ident_list_lookup(typedefs, *8p) c = ident_list_lookup(typedefs, *8p)
if c == 0 goto bps_bad_type if c == 0 goto bad_base_type
n = type_length(c) local len
len = type_length(c)
c += types c += types
memcpy(out, c, n) memcpy(out, c, len)
out += n out += len
goto base_type_done goto base_type_done
:bps_bad_type :bad_base_type
token_error(base_type, .str_bad_type) token_error(base_type, .str_bad_base_type)
:str_bad_base_type
string Bad base type.
byte 0
; how many bytes does it take to encode this type? ; how many bytes does it take to encode this type?
function type_length function type_length
@ -1258,7 +1259,11 @@ function type_sizeof
if c == TYPE_POINTER goto return_8 if c == TYPE_POINTER goto return_8
if c == TYPE_FUNCTION goto return_8 if c == TYPE_FUNCTION goto return_8
if c == TYPE_ARRAY goto sizeof_array if c == TYPE_ARRAY goto sizeof_array
byte 0xcc ; @TODO fputs(2, .str_sizeof_ni) ; @TODO
exit(1)
:str_sizeof_ni
string type_sizeof for this type not implemented.
byte 0
:sizeof_array :sizeof_array
local n local n

View file

@ -134,6 +134,11 @@ function file_error
byte 32 byte 32
byte 0 byte 0
function die
argument message
fputs(2, message)
exit(1)
function malloc function malloc
argument size argument size
local total_size local total_size