commas starting to work

2022-01-26 16:40:11 -05:00 · 2022-01-26 16:40:11 -05:00 · 7ca37ea8db
commit 7ca37ea8db
parent 73b6a68559
4 changed files with 182 additions and 173 deletions
--- a/05/main.b
+++ b/05/main.b
@ -38,8 +38,6 @@ global struct_names
 ; for unions, offset will always be 0.
 global structures
 global structures_bytes_used
 ; ident list of type IDs
 global parse_type_result
 #include util.b
 #include idents.b
--- a/05/main.c
+++ b/05/main.c
@ -3,4 +3,5 @@
 	long double d;
 } (*x)(void);
 */
-typedef int Foo;
+typedef int *Foo[3+342<<5];
 typedef int A, *B[10];
--- a/05/parse.b
+++ b/05/parse.b
@ -30,37 +30,62 @@ function parse_tokens
 	local ident
 	local type
 	local p
 	local base_type
 	local base_type_end
 	local prefix
 	local prefix_end
 	local suffix
 	local suffix_end
 	token = tokens
 	:parse_tokens_loop
 		if *1token == TOKEN_EOF goto parse_tokens_eof
 		if *1token == KEYWORD_TYPEDEF goto parse_typedef
-		byte 0xcc ; not implemented
+		die(.str_parse_tokens_ni)
-		
+		:str_parse_tokens_ni
 			string parse_tokens not implemented.
 			byte 0
 		:parse_typedef
-			token += 16
+			base_type = token + 16
-			type = parse_type(&token)
+			base_type_end = type_get_base_end(base_type)
-			if type != 0 goto typedef_no_ident
+			
-			if *1token != SYMBOL_SEMICOLON goto typedef_no_semicolon
+			token = base_type_end
 			putc('*)
 			p = parse_type_result
 			:typedef_loop
-				if *1p == 255 goto typedef_loop_end
+				prefix = token
-				ident = p
+				prefix_end = type_get_prefix_end(prefix)
-				p = memchr(p, 0)
+				if *1prefix_end != TOKEN_IDENTIFIER goto typedef_no_ident
-				p += 1
+				ident = prefix_end + 8
-				type = *8p
+				ident = *8ident
-				p += 8
+				suffix = prefix_end + 16
-				puts(ident)
+				suffix_end = type_get_suffix_end(suffix)
 				putc('B)
 				putc(':)
-				putc(32)
+				print_tokens(base_type, base_type_end)
 				putc('P)
 				putc(':)
 				print_tokens(prefix, prefix_end)
 				putc('S)
 				putc(':)
 				print_tokens(suffix, suffix_end)
 				type = types_bytes_used
 				parse_type_declarators(prefix, prefix_end, suffix, suffix_end)
 				parse_base_type(base_type)
 				print_type(type)
 				putc(10)
 				ident_list_add(typedefs, ident, type)
 				token = suffix_end
 				if *1token == SYMBOL_SEMICOLON goto typedef_loop_end
 				if *1token != SYMBOL_COMMA goto bad_typedef
 				token += 16 ; skip comma
 				goto typedef_loop
 			:typedef_loop_end
 			token += 16 ; skip semicolon
 			goto parse_tokens_loop
 		:typedef_no_ident
@ -68,10 +93,10 @@ function parse_tokens
 		:str_typedef_no_ident
 			string No identifier in typedef declaration.
 			byte 0
-		:typedef_no_semicolon
+		:bad_typedef
-			token_error(tokens, .str_typedef_no_semicolon)
+			token_error(tokens, .str_bad_typedef)
-		:str_typedef_no_semicolon
+		:str_bad_typedef
-			string typedef does not end with a semicolon.
+			string Bad typedef.
 			byte 0
 	:parse_tokens_eof
 	return
@ -147,25 +172,12 @@ function parse_type
 	;     struct Thing (*things[5])(void), *something_else[3];
 	;     BBBBBBBBBBBB PP      SSSSSSSSSS  P              SSS
 	; Here, we call `struct Thing` the "base type".
 	byte 0xcc
-	argument p_token
+; return the end of the base for this type.
-	local typeid
+function type_get_base_end
-	local token
+	argument token
 	local c
 	local p
 	local n
 	local base_type_end
 	local depth
 	local prefix
 	local prefix_end
 	local suffix
 	local suffix_end
 	local ident
 	token = *8p_token
 	prefix = token
 	ident_list_clear(parse_type_result)
 	c = *1token
 	if c == KEYWORD_STRUCT goto skip_struct_union_enum
 	if c == KEYWORD_UNION goto skip_struct_union_enum
@ -186,11 +198,30 @@ function parse_type
 			goto skip_base_type_loop
 	:skip_base_type_loop_end
 	return token
-	; find end of 1st prefix
+	:skip_struct_union_enum
-	base_type_end = token
+		token += 16
 		if *1token != TOKEN_IDENTIFIER goto skip_sue_no_name
 			token += 16 ; struct *blah*
 		:skip_sue_no_name
 		if *1token != SYMBOL_LBRACE goto skip_base_type_loop_end ; e.g. struct Something x[5];
 		; okay we have something like
 		;   struct {
 		;       int x, y;
 		;   } test;
 		token_skip_to_matching_rbrace(&token)
 		token += 16
 		goto skip_base_type_loop_end
 	:str_bad_type
 		string Bad type.
 		byte 0
-	ident = 0
+
 ; return the end of this type prefix
 function type_get_prefix_end
 	argument token
 	local c
 	:find_prefix_end_loop
 		c = *1token
 		if c == TOKEN_IDENTIFIER goto found_prefix_end
@ -211,19 +242,23 @@ function parse_type
 			token += 16
 			goto find_prefix_end_loop
 	:found_prefix_end
-	prefix_end = token
+	return token
-	if *1token != TOKEN_IDENTIFIER goto parse_type_no_ident
+; return the end of this type suffix
-	token += 8
+; NOTE: you must pass in the PREFIX.
-	ident = *8token
+; (In general, we can't find the end of the suffix without knowing the prefix.)
-	token += 8
+;    int (*x);
-	:parse_type_no_ident
+;            ^ suffix ends here
-	
+;    (int *)
-	
+;          ^ suffix ends here
-	suffix = token
+function type_get_suffix_end
 	argument prefix
 	local depth
 	local token
 	local c
 	; find end of suffix
-	token = base_type_end ; start back here so we can keep track of bracket depth
+	token = prefix
 	depth = 0 ; parenthesis/square bracket depth
 	:suffix_end_loop
 		c = *1token
@ -234,7 +269,7 @@ function parse_type
 		if c == SYMBOL_RPAREN goto suffix_end_decdepth
 		if c == SYMBOL_TIMES goto suffix_end_cont
 		if depth == 0 goto suffix_end_found
-		if c == TOKEN_EOF goto pt_bad_type
+		if c == TOKEN_EOF goto type_get_suffix_bad_type
 		goto suffix_end_cont
 		:suffix_end_incdepth
@ -249,87 +284,45 @@ function parse_type
 			goto suffix_end_loop
 	:suffix_end_found
-	suffix_end = token
+	return token
 	:type_get_suffix_bad_type
 		token_error(prefix, .str_bad_type)
 #define TYPEDEBUG ;
 	TYPEDEBUG	putc('B)
 	TYPEDEBUG	putc('a)
 	TYPEDEBUG	putc('s)
 	TYPEDEBUG	putc(':)
 	TYPEDEBUG	putc(32)
 	TYPEDEBUG	print_tokens(*8p_token, base_type_end)
 	TYPEDEBUG	putc('P)
 	TYPEDEBUG	putc('r)
 	TYPEDEBUG	putc('e)
 	TYPEDEBUG	putc(':)
 	TYPEDEBUG	putc(32)
 	TYPEDEBUG	print_tokens(prefix, prefix_end)
 	TYPEDEBUG	putc('S)
 	TYPEDEBUG	putc('u)
 	TYPEDEBUG	putc('f)
 	TYPEDEBUG	putc(':)
 	TYPEDEBUG	putc(32)
 	TYPEDEBUG	print_tokens(suffix, suffix_end)
-	typeid = types_bytes_used
+; writes to *(types + types_bytes_used), and updates types_bytes_used
-	p = types + typeid
+function parse_type_declarators
 	p = parse_type_given_base_prefix_suffix(*8p_token, prefix, prefix_end, suffix, suffix_end, p)
 	if ident == 0 goto type_no_ident
 		ident_list_add(parse_type_result, typeid)
 		typeid = 0
 	:type_no_ident
 	*8p_token = suffix_end
 	types_bytes_used = p - types
 	return typeid
 	:skip_struct_union_enum
 		token += 16
 		if *1token != TOKEN_IDENTIFIER goto skip_sue_no_name
 			token += 16 ; struct *blah*
 		:skip_sue_no_name
 		if *1token != SYMBOL_LBRACE goto skip_base_type_loop_end ; e.g. struct Something x[5];
 		; okay we have something like
 		;   struct {
 		;       int x, y;
 		;   } test;
 		token_skip_to_matching_rbrace(&token)
 		token += 16
 		goto skip_base_type_loop_end
 	:pt_bad_type
 		token_error(*8p_token, .str_bad_type)
 	:str_bad_type
 		string Bad type.
 		byte 0
 function parse_type_given_base_prefix_suffix
 	argument base_type
 	argument prefix
 	argument prefix_end
 	argument suffix
 	argument suffix_end
 	argument out
 	local p
 	local expr
 	local n
 	local c
 	local depth
 	local out
 	; main loop for parsing types
-	:parse_type_loop
+	:type_declarators_loop
 		p = prefix_end - 16
 		if *1suffix == SYMBOL_LSQUARE goto parse_array_type
 		if *1suffix == SYMBOL_LPAREN goto parse_function_type
 		if *1p == SYMBOL_TIMES goto parse_pointer_type
-		if suffix == suffix_end goto parse_base_type
+		if suffix == suffix_end goto type_declarators_loop_end
 		if *1suffix == SYMBOL_RPAREN goto parse_type_remove_parentheses
-		goto bps_bad_type
+		goto parse_typedecls_bad_type
 		:parse_pointer_type
 			out = types + types_bytes_used
 			*1out = TYPE_POINTER
-			out += 1
+			types_bytes_used += 1
 			prefix_end = p
-			goto parse_type_loop
+			goto type_declarators_loop
 		:parse_array_type
 			out = types + types_bytes_used
 			*1out = TYPE_ARRAY
 			types_bytes_used += 1
 			local prev_types
 			local prev_types_bytes_used
 			; little hack to avoid screwing up types like  double[sizeof(int)]
@ -339,74 +332,74 @@ function parse_type_given_base_prefix_suffix
 			types = malloc(4000)
 			types_init(types, &types_bytes_used)
 			expr = malloc(4000)
 			*1out = TYPE_ARRAY
 			out += 1
 			p = suffix
 			token_skip_to_matching_rsquare(&p)
 			suffix += 16 ; skip [
 			parse_expression(suffix, p, expr)
 			;print_expression(expr)
 			;putc(10)
-			evaluate_constant_expression(base_type, expr, &n)
+			evaluate_constant_expression(prefix, expr, &n)
 			if n < 0 goto bad_array_size
 			*8out = n
 			out += 8
 			free(expr)
 			free(types)
 			types = prev_types
 			types_bytes_used = prev_types_bytes_used
 			out = types + types_bytes_used
 			*8out = n
 			types_bytes_used += 8
 			suffix = p + 16
-			goto parse_type_loop
+			goto type_declarators_loop
 			:bad_array_size
-				token_error(base_type, .str_bad_array_size)
+				token_error(suffix, .str_bad_array_size)
 			:str_bad_array_size
 				string Very large or negative array size.
 				byte 0 
 		:parse_function_type
 			local prev_parse_type_result
 			prev_parse_type_result = parse_type_result
 			parse_type_result = ident_list_create(16000)
 			p = suffix + 16
 			out = types + types_bytes_used
 			*1out = TYPE_FUNCTION
-			out += 1
+			types_bytes_used += 1
 			:function_type_loop
-				if *1p == SYMBOL_RPAREN goto function_type_loop_end ; only needed for 1st iteration
+				byte 0xcc ; @TODO
-				n = parse_type(&p)
+			out = types + types_bytes_used
 				if n != 0 goto fparam_have_type
 				c = ident_list_len(parse_type_result)
 				if c != 1 goto bps_bad_type
 				n = ident_list_value_at_index(parse_type_result, 0)
 				:fparam_have_type
 				n += type_length(n)
 				out = types + n
 				if *1p == SYMBOL_RPAREN goto function_type_loop_end
 				if *1p != SYMBOL_COMMA goto bps_bad_type
 				p += 16
 				goto function_type_loop
 			:function_type_loop_end
 			*1out = 0
-			out += 1
+			types_bytes_used += 1
 			suffix = p + 16
-			ident_list_free(parse_type_result)
+			goto type_declarators_loop
 			parse_type_result = prev_parse_type_result
 			goto parse_type_loop
 		:parse_type_remove_parentheses
-			if *1p != SYMBOL_LPAREN goto bps_bad_type
+			if *1p != SYMBOL_LPAREN goto parse_typedecls_bad_type
 			prefix_end = p
 			suffix += 16
-			goto parse_type_loop
+			goto type_declarators_loop
-	:parse_base_type
+	:type_declarators_loop_end
-	if *1prefix == TOKEN_IDENTIFIER goto base_type_typedef
+	return 0
-	if *1prefix == KEYWORD_STRUCT goto base_type_struct
+	:parse_typedecls_bad_type
-	if *1prefix == KEYWORD_UNION goto base_type_union
+		token_error(prefix, .str_bad_type)
-	if *1prefix == KEYWORD_ENUM goto base_type_enum
+
-	if *1prefix == KEYWORD_FLOAT goto base_type_float
+; writes to *(types + types_bytes_used), and updates types_bytes_used (no return value)
-	if *1prefix == KEYWORD_VOID goto base_type_void
+function parse_base_type
 	argument base_type
 	local out
 	local flags
 	local p
 	local c
 	local depth
 	local expr
 	out = types + types_bytes_used
 	c = *1base_type
 	if c == TOKEN_IDENTIFIER goto base_type_typedef
 	if c == KEYWORD_STRUCT goto base_type_struct
 	if c == KEYWORD_UNION goto base_type_union
 	if c == KEYWORD_ENUM goto base_type_enum
 	if c == KEYWORD_FLOAT goto base_type_float
 	if c == KEYWORD_VOID goto base_type_void
 	; "normal" type like int, unsigned char, etc.
 	local flags
 	; annoyingly, all of these are equivalent to `unsigned long`:
 	;    unsigned long int
 	;    long unsigned int
@ -420,7 +413,7 @@ function parse_type_given_base_prefix_suffix
 #define PARSETYPE_FLAG_LONG 16
 #define PARSETYPE_FLAG_DOUBLE 32
 	flags = 0
-	p = prefix
+	p = base_type
 	:base_type_normal_loop
 		c = *1p
 		p += 16
@ -467,6 +460,8 @@ function parse_type_given_base_prefix_suffix
 	if flags == 32 goto base_type_double ; `double`
 	if flags == 48 goto base_type_double ; `long double` (we use the same type for double and long double)
 	goto bad_base_type
 	:base_type_char
 		*1out = TYPE_CHAR
 		out += 1
@ -505,10 +500,12 @@ function parse_type_given_base_prefix_suffix
 		goto base_type_done
 	:base_type_done
-	return out
+	types_bytes_used = out - types
 	return 0
 	:base_type_struct
 	:base_type_union
-		p = prefix + 16
+		p = base_type + 16
 		if *1p != TOKEN_IDENTIFIER goto base_type_struct_definition
 		p += 16
 		if *1p == SYMBOL_LBRACE goto base_type_struct_definition
@ -527,7 +524,7 @@ function parse_type_given_base_prefix_suffix
 			out += 1
 			goto base_type_done
 		:base_type_struct_definition
-			if *1p != SYMBOL_LBRACE goto bps_bad_type
+			if *1p != SYMBOL_LBRACE goto bad_base_type
 			byte 0xcc ; @TODO
 	:base_type_enum
 		local q
@ -536,9 +533,9 @@ function parse_type_given_base_prefix_suffix
 		out += 1
 		types_bytes_used = out - types
-		p = prefix + 16
+		p = base_type + 16
 		if *1p == SYMBOL_LBRACE goto enum_definition
-		if *1p != TOKEN_IDENTIFIER goto bps_bad_type ; e.g. enum int x;
+		if *1p != TOKEN_IDENTIFIER goto bad_base_type ; e.g. enum int x;
 		p += 16
 		if *1p == SYMBOL_LBRACE goto enum_definition
 		goto base_type_done ; just using an enum type, not defining it.
@ -571,7 +568,7 @@ function parse_type_given_base_prefix_suffix
 					if *1q == SYMBOL_COMMA goto enum_comma_loop_end
 					if *1q == SYMBOL_RBRACE goto enum_comma_loop_end
 					:enum_comma_deep
-					if *1q == TOKEN_EOF goto bps_bad_type
+					if *1q == TOKEN_EOF goto bad_base_type
 					c = *1q
 					q += 16
 					if c == SYMBOL_LPAREN goto enum_comma_incdepth
@ -625,17 +622,21 @@ function parse_type_given_base_prefix_suffix
 		out += 1
 		goto base_type_done	
 	:base_type_typedef
-		p = prefix + 8
+		p = base_type + 8
 		c = ident_list_lookup(typedefs, *8p)
-		if c == 0 goto bps_bad_type
+		if c == 0 goto bad_base_type
-		n = type_length(c)
+		local len
 		len = type_length(c)
 		c += types
-		memcpy(out, c, n)
+		memcpy(out, c, len)
-		out += n
+		out += len
 		goto base_type_done
-	:bps_bad_type
+	:bad_base_type
-		token_error(base_type, .str_bad_type)
+		token_error(base_type, .str_bad_base_type)
 	:str_bad_base_type
 		string Bad base type.
 		byte 0
 ; how many bytes does it take to encode this type?
 function type_length
@ -1258,7 +1259,11 @@ function type_sizeof
 	if c == TYPE_POINTER goto return_8
 	if c == TYPE_FUNCTION goto return_8
 	if c == TYPE_ARRAY goto sizeof_array
-	byte 0xcc ;  @TODO
+	fputs(2, .str_sizeof_ni) ;  @TODO
 	exit(1)
 	:str_sizeof_ni
 		string type_sizeof for this type not implemented.
 		byte 0
 	:sizeof_array
 		local n
--- a/05/util.b
+++ b/05/util.b
@ -134,6 +134,11 @@ function file_error
 	byte 32
 	byte 0
 function die
 	argument message
 	fputs(2, message)
 	exit(1)
 function malloc
 	argument size
 	local total_size