start parsing statements (not a lot yet)
This commit is contained in:
parent
fd02968c23
commit
d1167f03d0
4 changed files with 260 additions and 65 deletions
|
@ -230,6 +230,52 @@
|
|||
; types willl be initialized (in main) so that this refers to the type char*
|
||||
#define TYPE_POINTER_TO_CHAR 20
|
||||
|
||||
; STATEMENTS
|
||||
; In C, note that `if', `while', etc. always have a single statement as their body:
|
||||
; if (x) { y; z; w; }
|
||||
; here {y; z; w;} is a single `compound' statement containing three statements.
|
||||
; our statements don't directly correspond to the C89 standard's notion of statements, in particular,
|
||||
; labels count as separate statements and declarations count as statements.
|
||||
; each statement is stored as exactly 40 bytes
|
||||
; uchar type
|
||||
; uchar padding
|
||||
; ushort file
|
||||
; uint line
|
||||
; ulong data1
|
||||
; ulong data2
|
||||
; ulong data3
|
||||
; ulong data4
|
||||
; a type of 0 indicates the end of the block.
|
||||
; data layout for particular statements:
|
||||
; - STATEMENT_EXPRESSION - data1 is a pointer to expression data; data2,3,4 are unused
|
||||
; - STATEMENT_LOCAL_DECLARATION - declaring a local variable (automatic/"register" storage duration), data1 = total bytes used by all local variables so far in this function including this one; data2,3,4 unused
|
||||
; - STATEMENT_LABEL - data1 is a pointer to the name of the label; data2,3,4 are unused
|
||||
; - STATEMENT_BLOCK - data1 is a pointer to an array of statements; data2,3,4 are unused
|
||||
; - STATEMENT_IF - data1 is a pointer to the condition, data2 is a pointer to the `if' branch statement, data3 is a pointer to the `else' branch statement, or 0 if there is none; data4 is unused
|
||||
; - STATEMENT_SWITCH - data1 is a pointer to the expression, data2 is a pointer to the body statement; data3,4 are unused
|
||||
; - STATEMENT_WHILE - data1 is a pointer to the condition, data2 is a pointer to the body statement; data3,4 are unused
|
||||
; - STATEMENT_DO - data1 is a pointer to the body statement, data2 is a pointer to the condition; data3,4 are unused
|
||||
; - STATEMENT_FOR - data1,2,3 are pointers to the first, second, and third expressions inside parentheses, data4 is a pointer to the body statement
|
||||
; - STATEMENT_GOTO - data1 is a pointer to the name of the label; data2,3,4 are unused
|
||||
; - STATEMENT_CONTINUE - data1,2,3,4 are unused
|
||||
; - STATEMENT_BREAK - data1,2,3,4 are unused
|
||||
; - STATEMENT_RETURN - data1 is a pointer to the expression, or 0 if there is none; data2,3,4 are unused
|
||||
#define STATEMENT_EXPRESSION 1
|
||||
#define STATEMENT_LOCAL_DECLARATION 2
|
||||
#define STATEMENT_LABEL 3
|
||||
#define STATEMENT_BLOCK 4
|
||||
#define STATEMENT_IF 5
|
||||
#define STATEMENT_SWITCH 6
|
||||
#define STATEMENT_WHILE 7
|
||||
#define STATEMENT_DO 8
|
||||
#define STATEMENT_FOR 9
|
||||
#define STATEMENT_GOTO 0xa
|
||||
#define STATEMENT_CONTINUE 0xb
|
||||
#define STATEMENT_BREAK 0xc
|
||||
#define STATEMENT_RETURN 0xd
|
||||
|
||||
|
||||
|
||||
:keyword_table
|
||||
byte SYMBOL_SEMICOLON
|
||||
byte 59
|
||||
|
|
34
05/main.b
34
05/main.b
|
@ -42,6 +42,16 @@ global output_file_data
|
|||
; ident list of global variables. each one is stored as
|
||||
; (type << 32) | address
|
||||
global global_variables
|
||||
; ident list of functions. each entry is a pointer to a single statement - which should always be a STATEMENT_BLOCK
|
||||
global function_statements
|
||||
; statement_datas[0] = pointer to statement data for block-nesting depth 0 (i.e. function bodies)
|
||||
; statement_datas[1] = pointer to statement data for block-nesting depth 1 (blocks inside functions)
|
||||
; statement_datas[2] = pointer to statement data for block-nesting depth 2 (blocks inside blocks inside functions)
|
||||
; etc. up to statement_datas[15] "* 15 nesting levels of compound statements, iteration control structures, and selection control structures" C89 § 2.2.4.1
|
||||
; these have to be separated for reasons™
|
||||
global statement_datas
|
||||
global statement_datas_ends
|
||||
global parse_stmt_depth
|
||||
|
||||
#include util.b
|
||||
#include idents.b
|
||||
|
@ -154,15 +164,32 @@ function main
|
|||
local tokens
|
||||
local ast
|
||||
local p
|
||||
local q
|
||||
local i
|
||||
local output_fd
|
||||
|
||||
statement_datas = malloc(4000)
|
||||
statement_datas_ends = malloc(4000)
|
||||
p = statement_datas
|
||||
q = statement_datas_ends
|
||||
i = 0
|
||||
:statement_datas_loop
|
||||
*8p = malloc(4000000) ; supports 100,000 statements at each level
|
||||
*8q = p
|
||||
p += 8
|
||||
q += 8
|
||||
i += 1
|
||||
if i < 16 goto statement_datas_loop
|
||||
|
||||
fill_in_powers_of_10()
|
||||
|
||||
typedefs = ident_list_create(100000)
|
||||
enumerators = ident_list_create(4000000)
|
||||
structures = ident_list_create(4000000)
|
||||
global_variables = ident_list_create(4000000)
|
||||
global_variables = ident_list_create(400000)
|
||||
function_statements = ident_list_create(400000)
|
||||
|
||||
function_stmt_data = malloc(800000) ; should be at least 40 bytes * max # of functions
|
||||
|
||||
dat_banned_objmacros = 255
|
||||
dat_banned_fmacros = 255
|
||||
|
@ -197,14 +224,15 @@ function main
|
|||
translation_phase_4(input_filename, pptokens, processed_pptokens)
|
||||
free(pptokens)
|
||||
pptokens = processed_pptokens
|
||||
print_pptokens(pptokens)
|
||||
print_separator()
|
||||
;print_pptokens(pptokens)
|
||||
;print_separator()
|
||||
;print_object_macros()
|
||||
;print_function_macros()
|
||||
|
||||
tokens = malloc(16000000)
|
||||
p = tokenize(pptokens, tokens, input_filename, 1)
|
||||
print_tokens(tokens, p)
|
||||
print_separator()
|
||||
; NOTE: do NOT free pptokens; identifiers still reference them.
|
||||
|
||||
parse_tokens(tokens)
|
||||
|
|
115
05/main.c
115
05/main.c
|
@ -1,59 +1,64 @@
|
|||
typedef struct {
|
||||
int i[41];
|
||||
long double d;
|
||||
} (*x___)(void);
|
||||
int f(void) {
|
||||
blah:blah:blah:;
|
||||
}
|
||||
|
||||
typedef enum X {
|
||||
R,S,T
|
||||
} *Foo[sizeof(unsigned long)];
|
||||
typedef int A___[T];
|
||||
|
||||
typedef struct A {
|
||||
int x, y;
|
||||
long double c;
|
||||
unsigned long d;
|
||||
char e[3];
|
||||
long f;
|
||||
} A;
|
||||
|
||||
typedef union B{
|
||||
int x;
|
||||
struct {
|
||||
int y;
|
||||
struct {long z; } c;
|
||||
} c;
|
||||
}B;
|
||||
|
||||
typedef int QQQ[sizeof(A)+sizeof"hello"];
|
||||
typedef int RRR[sizeof(struct B)];
|
||||
|
||||
static unsigned int x={55};
|
||||
static char *s = "hello";
|
||||
static char *t = "goodbye";
|
||||
static char u[8] = "hellothe";
|
||||
static char v[100] = "re my";
|
||||
static char w[] = "friendly";
|
||||
static char x_[] = "hi";
|
||||
typedef int A_[sizeof x_ + sizeof u];
|
||||
|
||||
static int a[5] = {1,2,3};
|
||||
static char b[6][7] = {{'a'},{'b'},{'c'},{'d'},{'e'}};
|
||||
static char __b[][7] = {{'a'},"hello",'r'};
|
||||
static int _u = sizeof __b;
|
||||
|
||||
struct {
|
||||
int a;
|
||||
long b;
|
||||
} x1[] = {0x1234567890, 1ul<<60|1ul<<3, 77};
|
||||
int y1 = 0x12345678;
|
||||
|
||||
struct {
|
||||
int x[2], y;
|
||||
} test[] = {3, 5,0x1234,0x4321};
|
||||
typedef int Blah[sizeof((B *)0)->c.y];
|
||||
unsigned marker = 0xdeadbeef;
|
||||
|
||||
typedef int (*FUNCTION)(void);
|
||||
typedef int AAAA[sizeof*****((FUNCTION)0)];
|
||||
/* typedef struct { */
|
||||
/* int i[41]; */
|
||||
/* long double d; */
|
||||
/* } (*x___)(void); */
|
||||
/* */
|
||||
/* typedef enum X { */
|
||||
/* R,S,T */
|
||||
/* } *Foo[sizeof(unsigned long)]; */
|
||||
/* typedef int A___[T]; */
|
||||
/* */
|
||||
/* typedef struct A { */
|
||||
/* int x, y; */
|
||||
/* long double c; */
|
||||
/* unsigned long d; */
|
||||
/* char e[3]; */
|
||||
/* long f; */
|
||||
/* } A; */
|
||||
/* */
|
||||
/* typedef union B{ */
|
||||
/* int x; */
|
||||
/* struct { */
|
||||
/* int y; */
|
||||
/* struct {long z; } c; */
|
||||
/* } c; */
|
||||
/* }B; */
|
||||
/* */
|
||||
/* typedef int QQQ[sizeof(A)+sizeof"hello"]; */
|
||||
/* typedef int RRR[sizeof(struct B)]; */
|
||||
/* */
|
||||
/* static unsigned int x={55}; */
|
||||
/* static char *s = "hello"; */
|
||||
/* static char *t = "goodbye"; */
|
||||
/* static char u[8] = "hellothe"; */
|
||||
/* static char v[100] = "re my"; */
|
||||
/* static char w[] = "friendly"; */
|
||||
/* static char x_[] = "hi"; */
|
||||
/* typedef int A_[sizeof x_ + sizeof u]; */
|
||||
/* */
|
||||
/* static int a[5] = {1,2,3}; */
|
||||
/* static char b[6][7] = {{'a'},{'b'},{'c'},{'d'},{'e'}}; */
|
||||
/* static char __b[][7] = {{'a'},"hello",'r'}; */
|
||||
/* static int _u = sizeof __b; */
|
||||
/* */
|
||||
/* struct { */
|
||||
/* int a; */
|
||||
/* long b; */
|
||||
/* } x1[] = {0x1234567890, 1ul<<60|1ul<<3, 77}; */
|
||||
/* int y1 = 0x12345678; */
|
||||
/* */
|
||||
/* struct { */
|
||||
/* int x[2], y; */
|
||||
/* } test[] = {3, 5,0x1234,0x4321}; */
|
||||
/* typedef int Blah[sizeof((B *)0)->c.y]; */
|
||||
/* unsigned marker = 0xdeadbeef; */
|
||||
/* */
|
||||
/* typedef int (*FUNCTION)(void); */
|
||||
/* typedef int AAAA[sizeof*****((FUNCTION)0)]; */
|
||||
|
||||
/* typedef int X[sizeof(int)+4]; */
|
||||
|
|
130
05/parse.b
130
05/parse.b
|
@ -35,7 +35,7 @@ function structure_is_union
|
|||
if offset == 0 goto return_1 ; if that's 0, it's a union or 1-element struct
|
||||
goto return_0
|
||||
|
||||
|
||||
; parse a translation unit
|
||||
function parse_tokens
|
||||
argument tokens
|
||||
local token
|
||||
|
@ -44,6 +44,7 @@ function parse_tokens
|
|||
local p
|
||||
local b
|
||||
local c
|
||||
local n
|
||||
local base_type
|
||||
local base_type_end
|
||||
local name
|
||||
|
@ -52,6 +53,7 @@ function parse_tokens
|
|||
local suffix
|
||||
local suffix_end
|
||||
local is_extern
|
||||
local out
|
||||
|
||||
token = tokens
|
||||
:parse_tokens_loop
|
||||
|
@ -64,7 +66,7 @@ function parse_tokens
|
|||
b = token_is_type(token)
|
||||
if b != 0 goto parse_toplevel_decl
|
||||
|
||||
die(.str_bad_statement)
|
||||
token_error(token, .str_bad_statement)
|
||||
:str_bad_statement
|
||||
string Bad statement.
|
||||
byte 0
|
||||
|
@ -164,12 +166,26 @@ function parse_tokens
|
|||
byte 0
|
||||
:parse_function_definition
|
||||
p = types + type
|
||||
; @NOTE: remember to turn array members into pointers
|
||||
; @TODO: parameters
|
||||
; @NOTE: remember to turn array members into pointers
|
||||
if *1p != TYPE_FUNCTION goto lbrace_after_declaration
|
||||
die(.str_fdNI) ; @TODO
|
||||
:str_fdNI
|
||||
string function definitions not implemented.
|
||||
byte 10
|
||||
|
||||
global function_stmt_data ; initialized in main
|
||||
global function_stmt_data_bytes_used
|
||||
|
||||
n = function_stmt_data_bytes_used
|
||||
out = function_stmt_data + function_stmt_data_bytes_used
|
||||
parse_statement(&token, &out)
|
||||
if parse_stmt_depth != 0 goto stmtdepth_internal_err
|
||||
function_stmt_data_bytes_used = out - function_stmt_data
|
||||
|
||||
ident_list_add(function_statements, name, n)
|
||||
goto parse_tokens_loop
|
||||
|
||||
:stmtdepth_internal_err
|
||||
token_error(token, .str_stmtdepth_internal_err)
|
||||
:str_stmtdepth_internal_err
|
||||
string Internal compiler error: parse_stmt_depth is not 0 after parsing function body.
|
||||
byte 0
|
||||
:lbrace_after_declaration
|
||||
token_error(token, .str_lbrace_after_declaration)
|
||||
|
@ -240,6 +256,106 @@ function parse_tokens
|
|||
:parse_tokens_eof
|
||||
return
|
||||
|
||||
; write type, file, and line info for statement
|
||||
function write_statement_header
|
||||
local out
|
||||
local type
|
||||
local token
|
||||
*1out = type
|
||||
out += 2
|
||||
token += 2
|
||||
*2out = *2token
|
||||
out += 2
|
||||
token += 2
|
||||
*4out = *4token
|
||||
return 0
|
||||
|
||||
; writes statement data for the statement at *p_token to (*)*p_out
|
||||
; always advances *p_out by exactly 40 bytes, since that's the length of a statement.
|
||||
function parse_statement
|
||||
argument p_token
|
||||
argument p_out
|
||||
local out
|
||||
local token
|
||||
local p
|
||||
local c
|
||||
local n
|
||||
|
||||
|
||||
out = *8p_out
|
||||
token = *8p_token
|
||||
|
||||
:stmt_label_loop
|
||||
; if second token in statement is a colon, this must be a label
|
||||
p = token + 16
|
||||
if *1p == SYMBOL_COLON goto stmt_label
|
||||
goto stmt_label_loop_end
|
||||
|
||||
:stmt_label
|
||||
write_statement_header(out, STATEMENT_LABEL, token)
|
||||
out += 8
|
||||
token += 8
|
||||
*8out = *8token ; copy label name
|
||||
out += 32
|
||||
token += 24 ; skip ident name, and colon
|
||||
goto stmt_label_loop
|
||||
:stmt_label_loop_end
|
||||
|
||||
c = *1token
|
||||
if c == SYMBOL_SEMICOLON goto stmt_empty
|
||||
if c == SYMBOL_LBRACE goto stmt_block
|
||||
|
||||
token_error(token, .str_unrecognized_statement)
|
||||
:str_unrecognized_statement
|
||||
string Unrecognized statement.
|
||||
byte 0
|
||||
:parse_statement_ret
|
||||
*8p_token = token
|
||||
*8p_out = out
|
||||
return
|
||||
:stmt_block
|
||||
local block_p_out
|
||||
; find the appropriate statement data to use for this block's body
|
||||
block_p_out = statement_datas_ends
|
||||
block_p_out += parse_stmt_depth < 3
|
||||
|
||||
write_statement_header(out, STATEMENT_BLOCK, token)
|
||||
out += 8
|
||||
*8out = *8block_p_out
|
||||
out += 32
|
||||
|
||||
parse_stmt_depth += 1
|
||||
if parse_stmt_depth >= 16 goto too_much_nesting
|
||||
|
||||
token += 16 ; skip opening {
|
||||
:parse_block_loop
|
||||
if *1token == TOKEN_EOF goto parse_block_eof
|
||||
if *1token == SYMBOL_RBRACE goto parse_block_loop_end
|
||||
parse_statement(&token, block_p_out)
|
||||
goto parse_block_loop
|
||||
:parse_block_loop_end
|
||||
token += 16 ; skip closing }
|
||||
p = *8block_p_out
|
||||
*1p = 0 ; probably redundant, but whatever
|
||||
*8block_p_out += 8 ; add 8 and not 1 because of alignment
|
||||
parse_stmt_depth -= 1
|
||||
goto parse_statement_ret
|
||||
|
||||
:parse_block_eof
|
||||
token_error(*8p_token, .str_parse_block_eof)
|
||||
:str_parse_block_eof
|
||||
string End of file reached while trying to parse block. Are you missing a closing brace?
|
||||
byte 0
|
||||
:too_much_nesting
|
||||
token_error(token, .str_too_much_nesting)
|
||||
:str_too_much_nesting
|
||||
string Too many levels of nesting blocks.
|
||||
byte 0
|
||||
:stmt_empty
|
||||
; empty statement, e.g. while(something)-> ; <-
|
||||
token += 16 ; skip semicolon
|
||||
goto parse_statement_ret
|
||||
|
||||
; parse a global variable's initializer
|
||||
; e.g. int x[5] = {1+8, 2, 3, 4, 5};
|
||||
; advances *p_token to the token right after the initializer
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue