start parsing statements (not a lot yet)
This commit is contained in:
parent
fd02968c23
commit
d1167f03d0
4 changed files with 260 additions and 65 deletions
|
@ -230,6 +230,52 @@
|
||||||
; types willl be initialized (in main) so that this refers to the type char*
|
; types willl be initialized (in main) so that this refers to the type char*
|
||||||
#define TYPE_POINTER_TO_CHAR 20
|
#define TYPE_POINTER_TO_CHAR 20
|
||||||
|
|
||||||
|
; STATEMENTS
|
||||||
|
; In C, note that `if', `while', etc. always have a single statement as their body:
|
||||||
|
; if (x) { y; z; w; }
|
||||||
|
; here {y; z; w;} is a single `compound' statement containing three statements.
|
||||||
|
; our statements don't directly correspond to the C89 standard's notion of statements, in particular,
|
||||||
|
; labels count as separate statements and declarations count as statements.
|
||||||
|
; each statement is stored as exactly 40 bytes
|
||||||
|
; uchar type
|
||||||
|
; uchar padding
|
||||||
|
; ushort file
|
||||||
|
; uint line
|
||||||
|
; ulong data1
|
||||||
|
; ulong data2
|
||||||
|
; ulong data3
|
||||||
|
; ulong data4
|
||||||
|
; a type of 0 indicates the end of the block.
|
||||||
|
; data layout for particular statements:
|
||||||
|
; - STATEMENT_EXPRESSION - data1 is a pointer to expression data; data2,3,4 are unused
|
||||||
|
; - STATEMENT_LOCAL_DECLARATION - declaring a local variable (automatic/"register" storage duration), data1 = total bytes used by all local variables so far in this function including this one; data2,3,4 unused
|
||||||
|
; - STATEMENT_LABEL - data1 is a pointer to the name of the label; data2,3,4 are unused
|
||||||
|
; - STATEMENT_BLOCK - data1 is a pointer to an array of statements; data2,3,4 are unused
|
||||||
|
; - STATEMENT_IF - data1 is a pointer to the condition, data2 is a pointer to the `if' branch statement, data3 is a pointer to the `else' branch statement, or 0 if there is none; data4 is unused
|
||||||
|
; - STATEMENT_SWITCH - data1 is a pointer to the expression, data2 is a pointer to the body statement; data3,4 are unused
|
||||||
|
; - STATEMENT_WHILE - data1 is a pointer to the condition, data2 is a pointer to the body statement; data3,4 are unused
|
||||||
|
; - STATEMENT_DO - data1 is a pointer to the body statement, data2 is a pointer to the condition; data3,4 are unused
|
||||||
|
; - STATEMENT_FOR - data1,2,3 are pointers to the first, second, and third expressions inside parentheses, data4 is a pointer to the body statement
|
||||||
|
; - STATEMENT_GOTO - data1 is a pointer to the name of the label; data2,3,4 are unused
|
||||||
|
; - STATEMENT_CONTINUE - data1,2,3,4 are unused
|
||||||
|
; - STATEMENT_BREAK - data1,2,3,4 are unused
|
||||||
|
; - STATEMENT_RETURN - data1 is a pointer to the expression, or 0 if there is none; data2,3,4 are unused
|
||||||
|
#define STATEMENT_EXPRESSION 1
|
||||||
|
#define STATEMENT_LOCAL_DECLARATION 2
|
||||||
|
#define STATEMENT_LABEL 3
|
||||||
|
#define STATEMENT_BLOCK 4
|
||||||
|
#define STATEMENT_IF 5
|
||||||
|
#define STATEMENT_SWITCH 6
|
||||||
|
#define STATEMENT_WHILE 7
|
||||||
|
#define STATEMENT_DO 8
|
||||||
|
#define STATEMENT_FOR 9
|
||||||
|
#define STATEMENT_GOTO 0xa
|
||||||
|
#define STATEMENT_CONTINUE 0xb
|
||||||
|
#define STATEMENT_BREAK 0xc
|
||||||
|
#define STATEMENT_RETURN 0xd
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
:keyword_table
|
:keyword_table
|
||||||
byte SYMBOL_SEMICOLON
|
byte SYMBOL_SEMICOLON
|
||||||
byte 59
|
byte 59
|
||||||
|
|
34
05/main.b
34
05/main.b
|
@ -42,6 +42,16 @@ global output_file_data
|
||||||
; ident list of global variables. each one is stored as
|
; ident list of global variables. each one is stored as
|
||||||
; (type << 32) | address
|
; (type << 32) | address
|
||||||
global global_variables
|
global global_variables
|
||||||
|
; ident list of functions. each entry is a pointer to a single statement - which should always be a STATEMENT_BLOCK
|
||||||
|
global function_statements
|
||||||
|
; statement_datas[0] = pointer to statement data for block-nesting depth 0 (i.e. function bodies)
|
||||||
|
; statement_datas[1] = pointer to statement data for block-nesting depth 1 (blocks inside functions)
|
||||||
|
; statement_datas[2] = pointer to statement data for block-nesting depth 2 (blocks inside blocks inside functions)
|
||||||
|
; etc. up to statement_datas[15] "* 15 nesting levels of compound statements, iteration control structures, and selection control structures" C89 § 2.2.4.1
|
||||||
|
; these have to be separated for reasons™
|
||||||
|
global statement_datas
|
||||||
|
global statement_datas_ends
|
||||||
|
global parse_stmt_depth
|
||||||
|
|
||||||
#include util.b
|
#include util.b
|
||||||
#include idents.b
|
#include idents.b
|
||||||
|
@ -154,15 +164,32 @@ function main
|
||||||
local tokens
|
local tokens
|
||||||
local ast
|
local ast
|
||||||
local p
|
local p
|
||||||
|
local q
|
||||||
local i
|
local i
|
||||||
local output_fd
|
local output_fd
|
||||||
|
|
||||||
|
statement_datas = malloc(4000)
|
||||||
|
statement_datas_ends = malloc(4000)
|
||||||
|
p = statement_datas
|
||||||
|
q = statement_datas_ends
|
||||||
|
i = 0
|
||||||
|
:statement_datas_loop
|
||||||
|
*8p = malloc(4000000) ; supports 100,000 statements at each level
|
||||||
|
*8q = p
|
||||||
|
p += 8
|
||||||
|
q += 8
|
||||||
|
i += 1
|
||||||
|
if i < 16 goto statement_datas_loop
|
||||||
|
|
||||||
fill_in_powers_of_10()
|
fill_in_powers_of_10()
|
||||||
|
|
||||||
typedefs = ident_list_create(100000)
|
typedefs = ident_list_create(100000)
|
||||||
enumerators = ident_list_create(4000000)
|
enumerators = ident_list_create(4000000)
|
||||||
structures = ident_list_create(4000000)
|
structures = ident_list_create(4000000)
|
||||||
global_variables = ident_list_create(4000000)
|
global_variables = ident_list_create(400000)
|
||||||
|
function_statements = ident_list_create(400000)
|
||||||
|
|
||||||
|
function_stmt_data = malloc(800000) ; should be at least 40 bytes * max # of functions
|
||||||
|
|
||||||
dat_banned_objmacros = 255
|
dat_banned_objmacros = 255
|
||||||
dat_banned_fmacros = 255
|
dat_banned_fmacros = 255
|
||||||
|
@ -197,14 +224,15 @@ function main
|
||||||
translation_phase_4(input_filename, pptokens, processed_pptokens)
|
translation_phase_4(input_filename, pptokens, processed_pptokens)
|
||||||
free(pptokens)
|
free(pptokens)
|
||||||
pptokens = processed_pptokens
|
pptokens = processed_pptokens
|
||||||
print_pptokens(pptokens)
|
;print_pptokens(pptokens)
|
||||||
print_separator()
|
;print_separator()
|
||||||
;print_object_macros()
|
;print_object_macros()
|
||||||
;print_function_macros()
|
;print_function_macros()
|
||||||
|
|
||||||
tokens = malloc(16000000)
|
tokens = malloc(16000000)
|
||||||
p = tokenize(pptokens, tokens, input_filename, 1)
|
p = tokenize(pptokens, tokens, input_filename, 1)
|
||||||
print_tokens(tokens, p)
|
print_tokens(tokens, p)
|
||||||
|
print_separator()
|
||||||
; NOTE: do NOT free pptokens; identifiers still reference them.
|
; NOTE: do NOT free pptokens; identifiers still reference them.
|
||||||
|
|
||||||
parse_tokens(tokens)
|
parse_tokens(tokens)
|
||||||
|
|
115
05/main.c
115
05/main.c
|
@ -1,59 +1,64 @@
|
||||||
typedef struct {
|
int f(void) {
|
||||||
int i[41];
|
blah:blah:blah:;
|
||||||
long double d;
|
}
|
||||||
} (*x___)(void);
|
|
||||||
|
|
||||||
typedef enum X {
|
|
||||||
R,S,T
|
|
||||||
} *Foo[sizeof(unsigned long)];
|
|
||||||
typedef int A___[T];
|
|
||||||
|
|
||||||
typedef struct A {
|
/* typedef struct { */
|
||||||
int x, y;
|
/* int i[41]; */
|
||||||
long double c;
|
/* long double d; */
|
||||||
unsigned long d;
|
/* } (*x___)(void); */
|
||||||
char e[3];
|
/* */
|
||||||
long f;
|
/* typedef enum X { */
|
||||||
} A;
|
/* R,S,T */
|
||||||
|
/* } *Foo[sizeof(unsigned long)]; */
|
||||||
typedef union B{
|
/* typedef int A___[T]; */
|
||||||
int x;
|
/* */
|
||||||
struct {
|
/* typedef struct A { */
|
||||||
int y;
|
/* int x, y; */
|
||||||
struct {long z; } c;
|
/* long double c; */
|
||||||
} c;
|
/* unsigned long d; */
|
||||||
}B;
|
/* char e[3]; */
|
||||||
|
/* long f; */
|
||||||
typedef int QQQ[sizeof(A)+sizeof"hello"];
|
/* } A; */
|
||||||
typedef int RRR[sizeof(struct B)];
|
/* */
|
||||||
|
/* typedef union B{ */
|
||||||
static unsigned int x={55};
|
/* int x; */
|
||||||
static char *s = "hello";
|
/* struct { */
|
||||||
static char *t = "goodbye";
|
/* int y; */
|
||||||
static char u[8] = "hellothe";
|
/* struct {long z; } c; */
|
||||||
static char v[100] = "re my";
|
/* } c; */
|
||||||
static char w[] = "friendly";
|
/* }B; */
|
||||||
static char x_[] = "hi";
|
/* */
|
||||||
typedef int A_[sizeof x_ + sizeof u];
|
/* typedef int QQQ[sizeof(A)+sizeof"hello"]; */
|
||||||
|
/* typedef int RRR[sizeof(struct B)]; */
|
||||||
static int a[5] = {1,2,3};
|
/* */
|
||||||
static char b[6][7] = {{'a'},{'b'},{'c'},{'d'},{'e'}};
|
/* static unsigned int x={55}; */
|
||||||
static char __b[][7] = {{'a'},"hello",'r'};
|
/* static char *s = "hello"; */
|
||||||
static int _u = sizeof __b;
|
/* static char *t = "goodbye"; */
|
||||||
|
/* static char u[8] = "hellothe"; */
|
||||||
struct {
|
/* static char v[100] = "re my"; */
|
||||||
int a;
|
/* static char w[] = "friendly"; */
|
||||||
long b;
|
/* static char x_[] = "hi"; */
|
||||||
} x1[] = {0x1234567890, 1ul<<60|1ul<<3, 77};
|
/* typedef int A_[sizeof x_ + sizeof u]; */
|
||||||
int y1 = 0x12345678;
|
/* */
|
||||||
|
/* static int a[5] = {1,2,3}; */
|
||||||
struct {
|
/* static char b[6][7] = {{'a'},{'b'},{'c'},{'d'},{'e'}}; */
|
||||||
int x[2], y;
|
/* static char __b[][7] = {{'a'},"hello",'r'}; */
|
||||||
} test[] = {3, 5,0x1234,0x4321};
|
/* static int _u = sizeof __b; */
|
||||||
typedef int Blah[sizeof((B *)0)->c.y];
|
/* */
|
||||||
unsigned marker = 0xdeadbeef;
|
/* struct { */
|
||||||
|
/* int a; */
|
||||||
typedef int (*FUNCTION)(void);
|
/* long b; */
|
||||||
typedef int AAAA[sizeof*****((FUNCTION)0)];
|
/* } x1[] = {0x1234567890, 1ul<<60|1ul<<3, 77}; */
|
||||||
|
/* int y1 = 0x12345678; */
|
||||||
|
/* */
|
||||||
|
/* struct { */
|
||||||
|
/* int x[2], y; */
|
||||||
|
/* } test[] = {3, 5,0x1234,0x4321}; */
|
||||||
|
/* typedef int Blah[sizeof((B *)0)->c.y]; */
|
||||||
|
/* unsigned marker = 0xdeadbeef; */
|
||||||
|
/* */
|
||||||
|
/* typedef int (*FUNCTION)(void); */
|
||||||
|
/* typedef int AAAA[sizeof*****((FUNCTION)0)]; */
|
||||||
|
|
||||||
/* typedef int X[sizeof(int)+4]; */
|
/* typedef int X[sizeof(int)+4]; */
|
||||||
|
|
130
05/parse.b
130
05/parse.b
|
@ -35,7 +35,7 @@ function structure_is_union
|
||||||
if offset == 0 goto return_1 ; if that's 0, it's a union or 1-element struct
|
if offset == 0 goto return_1 ; if that's 0, it's a union or 1-element struct
|
||||||
goto return_0
|
goto return_0
|
||||||
|
|
||||||
|
; parse a translation unit
|
||||||
function parse_tokens
|
function parse_tokens
|
||||||
argument tokens
|
argument tokens
|
||||||
local token
|
local token
|
||||||
|
@ -44,6 +44,7 @@ function parse_tokens
|
||||||
local p
|
local p
|
||||||
local b
|
local b
|
||||||
local c
|
local c
|
||||||
|
local n
|
||||||
local base_type
|
local base_type
|
||||||
local base_type_end
|
local base_type_end
|
||||||
local name
|
local name
|
||||||
|
@ -52,6 +53,7 @@ function parse_tokens
|
||||||
local suffix
|
local suffix
|
||||||
local suffix_end
|
local suffix_end
|
||||||
local is_extern
|
local is_extern
|
||||||
|
local out
|
||||||
|
|
||||||
token = tokens
|
token = tokens
|
||||||
:parse_tokens_loop
|
:parse_tokens_loop
|
||||||
|
@ -64,7 +66,7 @@ function parse_tokens
|
||||||
b = token_is_type(token)
|
b = token_is_type(token)
|
||||||
if b != 0 goto parse_toplevel_decl
|
if b != 0 goto parse_toplevel_decl
|
||||||
|
|
||||||
die(.str_bad_statement)
|
token_error(token, .str_bad_statement)
|
||||||
:str_bad_statement
|
:str_bad_statement
|
||||||
string Bad statement.
|
string Bad statement.
|
||||||
byte 0
|
byte 0
|
||||||
|
@ -164,12 +166,26 @@ function parse_tokens
|
||||||
byte 0
|
byte 0
|
||||||
:parse_function_definition
|
:parse_function_definition
|
||||||
p = types + type
|
p = types + type
|
||||||
; @NOTE: remember to turn array members into pointers
|
; @TODO: parameters
|
||||||
|
; @NOTE: remember to turn array members into pointers
|
||||||
if *1p != TYPE_FUNCTION goto lbrace_after_declaration
|
if *1p != TYPE_FUNCTION goto lbrace_after_declaration
|
||||||
die(.str_fdNI) ; @TODO
|
|
||||||
:str_fdNI
|
global function_stmt_data ; initialized in main
|
||||||
string function definitions not implemented.
|
global function_stmt_data_bytes_used
|
||||||
byte 10
|
|
||||||
|
n = function_stmt_data_bytes_used
|
||||||
|
out = function_stmt_data + function_stmt_data_bytes_used
|
||||||
|
parse_statement(&token, &out)
|
||||||
|
if parse_stmt_depth != 0 goto stmtdepth_internal_err
|
||||||
|
function_stmt_data_bytes_used = out - function_stmt_data
|
||||||
|
|
||||||
|
ident_list_add(function_statements, name, n)
|
||||||
|
goto parse_tokens_loop
|
||||||
|
|
||||||
|
:stmtdepth_internal_err
|
||||||
|
token_error(token, .str_stmtdepth_internal_err)
|
||||||
|
:str_stmtdepth_internal_err
|
||||||
|
string Internal compiler error: parse_stmt_depth is not 0 after parsing function body.
|
||||||
byte 0
|
byte 0
|
||||||
:lbrace_after_declaration
|
:lbrace_after_declaration
|
||||||
token_error(token, .str_lbrace_after_declaration)
|
token_error(token, .str_lbrace_after_declaration)
|
||||||
|
@ -240,6 +256,106 @@ function parse_tokens
|
||||||
:parse_tokens_eof
|
:parse_tokens_eof
|
||||||
return
|
return
|
||||||
|
|
||||||
|
; write type, file, and line info for statement
|
||||||
|
function write_statement_header
|
||||||
|
local out
|
||||||
|
local type
|
||||||
|
local token
|
||||||
|
*1out = type
|
||||||
|
out += 2
|
||||||
|
token += 2
|
||||||
|
*2out = *2token
|
||||||
|
out += 2
|
||||||
|
token += 2
|
||||||
|
*4out = *4token
|
||||||
|
return 0
|
||||||
|
|
||||||
|
; writes statement data for the statement at *p_token to (*)*p_out
|
||||||
|
; always advances *p_out by exactly 40 bytes, since that's the length of a statement.
|
||||||
|
function parse_statement
|
||||||
|
argument p_token
|
||||||
|
argument p_out
|
||||||
|
local out
|
||||||
|
local token
|
||||||
|
local p
|
||||||
|
local c
|
||||||
|
local n
|
||||||
|
|
||||||
|
|
||||||
|
out = *8p_out
|
||||||
|
token = *8p_token
|
||||||
|
|
||||||
|
:stmt_label_loop
|
||||||
|
; if second token in statement is a colon, this must be a label
|
||||||
|
p = token + 16
|
||||||
|
if *1p == SYMBOL_COLON goto stmt_label
|
||||||
|
goto stmt_label_loop_end
|
||||||
|
|
||||||
|
:stmt_label
|
||||||
|
write_statement_header(out, STATEMENT_LABEL, token)
|
||||||
|
out += 8
|
||||||
|
token += 8
|
||||||
|
*8out = *8token ; copy label name
|
||||||
|
out += 32
|
||||||
|
token += 24 ; skip ident name, and colon
|
||||||
|
goto stmt_label_loop
|
||||||
|
:stmt_label_loop_end
|
||||||
|
|
||||||
|
c = *1token
|
||||||
|
if c == SYMBOL_SEMICOLON goto stmt_empty
|
||||||
|
if c == SYMBOL_LBRACE goto stmt_block
|
||||||
|
|
||||||
|
token_error(token, .str_unrecognized_statement)
|
||||||
|
:str_unrecognized_statement
|
||||||
|
string Unrecognized statement.
|
||||||
|
byte 0
|
||||||
|
:parse_statement_ret
|
||||||
|
*8p_token = token
|
||||||
|
*8p_out = out
|
||||||
|
return
|
||||||
|
:stmt_block
|
||||||
|
local block_p_out
|
||||||
|
; find the appropriate statement data to use for this block's body
|
||||||
|
block_p_out = statement_datas_ends
|
||||||
|
block_p_out += parse_stmt_depth < 3
|
||||||
|
|
||||||
|
write_statement_header(out, STATEMENT_BLOCK, token)
|
||||||
|
out += 8
|
||||||
|
*8out = *8block_p_out
|
||||||
|
out += 32
|
||||||
|
|
||||||
|
parse_stmt_depth += 1
|
||||||
|
if parse_stmt_depth >= 16 goto too_much_nesting
|
||||||
|
|
||||||
|
token += 16 ; skip opening {
|
||||||
|
:parse_block_loop
|
||||||
|
if *1token == TOKEN_EOF goto parse_block_eof
|
||||||
|
if *1token == SYMBOL_RBRACE goto parse_block_loop_end
|
||||||
|
parse_statement(&token, block_p_out)
|
||||||
|
goto parse_block_loop
|
||||||
|
:parse_block_loop_end
|
||||||
|
token += 16 ; skip closing }
|
||||||
|
p = *8block_p_out
|
||||||
|
*1p = 0 ; probably redundant, but whatever
|
||||||
|
*8block_p_out += 8 ; add 8 and not 1 because of alignment
|
||||||
|
parse_stmt_depth -= 1
|
||||||
|
goto parse_statement_ret
|
||||||
|
|
||||||
|
:parse_block_eof
|
||||||
|
token_error(*8p_token, .str_parse_block_eof)
|
||||||
|
:str_parse_block_eof
|
||||||
|
string End of file reached while trying to parse block. Are you missing a closing brace?
|
||||||
|
byte 0
|
||||||
|
:too_much_nesting
|
||||||
|
token_error(token, .str_too_much_nesting)
|
||||||
|
:str_too_much_nesting
|
||||||
|
string Too many levels of nesting blocks.
|
||||||
|
byte 0
|
||||||
|
:stmt_empty
|
||||||
|
; empty statement, e.g. while(something)-> ; <-
|
||||||
|
token += 16 ; skip semicolon
|
||||||
|
goto parse_statement_ret
|
||||||
|
|
||||||
; parse a global variable's initializer
|
; parse a global variable's initializer
|
||||||
; e.g. int x[5] = {1+8, 2, 3, 4, 5};
|
; e.g. int x[5] = {1+8, 2, 3, 4, 5};
|
||||||
; advances *p_token to the token right after the initializer
|
; advances *p_token to the token right after the initializer
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue