start codegen

This commit is contained in:
pommicket 2022-02-09 22:44:27 -05:00
parent d74dc53b0b
commit 3d44eba388
5 changed files with 210 additions and 4 deletions

195
05/codegen.b Normal file
View file

@ -0,0 +1,195 @@
; CALLING CONVENTION:
; arguments are pushed onto the stack by the caller, from right to left
; caller must also reserve space on stack for return value
; so the function puts the return value at [rbp+8] (+8 for stored return address)
global code_output
global codegen_second_pass ; = 0 on first global pass, 1 on second global pass
global functions_addresses ; ident list of addresses
global functions_labels ; ident list of ident lists of label addresses
global curr_function_labels ; ident list of labels for current function (written to in 1st pass, read from in 2nd pass)
#define REG_RAX 0
#define REG_RBX 3
#define REG_RCX 1
#define REG_RDX 2
#define REG_RSP 4
#define REG_RBP 5
#define REG_RSI 6
#define REG_RDI 7
function emit_byte
argument byte
*1code_output = byte
code_output += 1
return
function emit_bytes
argument bytes
argument count
memcpy(code_output, bytes, count)
code_output += count
return
function emit_word
argument word
*2code_output = word
code_output += 2
return
function emit_dword
argument word
*4code_output = word
code_output += 4
return
function emit_qword
argument word
*8code_output = word
code_output += 8
return
; e.g. emit_mov_reg(REG_RAX, REG_RBX) emits mov rax, rbx
function emit_mov_reg
argument dest
argument src
local n
;48 89 (DEST|SRC<<3|0xc0)
*2code_output = 0x8948
code_output += 2
n = 0xc0 | dest
n |= src < 3
*1code_output = n
code_output += 1
return
function emit_sub_rsp_imm32
argument imm32
;48 81 ec IMM32
*2code_output = 0x8148
code_output += 2
*1code_output = 0xec
code_output += 1
*4code_output = imm32
code_output += 4
return
function emit_mov_qword_rsp_rbp
; 48 89 2c 24
*4code_output = 0x242c8948
code_output += 4
return
function emit_mov_rbp_qword_rsp
; 48 8b 2c 24
*4code_output = 0x242c8b48
code_output += 4
return
function emit_add_rsp_imm32
argument imm32
;48 81 c4 IMM32
*2code_output = 0x8148
code_output += 2
*1code_output = 0xc4
code_output += 1
*4code_output = imm32
code_output += 4
return
function emit_ret
*1code_output = 0xc3
code_output += 1
return
; make sure you put the return value in the proper place before calling this
function generate_return
emit_mov_reg(REG_RSP, REG_RBP)
emit_mov_rbp_qword_rsp()
emit_add_rsp_imm32(8)
emit_ret()
return
function generate_statement
argument statement
; @TODO
return
function generate_function
argument function_name
argument function_statement
local out0
if codegen_second_pass != 0 goto genf_second_pass
curr_function_labels = ident_list_create(4000) ; ~ 200 labels per function should be plenty
ident_list_add(functions_labels, function_name, curr_function_labels)
goto genf_cont
:genf_second_pass
curr_function_labels = ident_list_lookup(functions_labels, function_name)
:genf_cont
; prologue
emit_sub_rsp_imm32(8)
emit_mov_qword_rsp_rbp()
emit_mov_reg(REG_RBP, REG_RSP)
generate_statement(function_statement)
; implicit return at end of function
generate_return()
return
function generate_functions
local addr
local c
local p
local function_name
function_name = function_statements
:genfunctions_loop
if *1function_name == 0 goto genfunctions_loop_end
addr = code_output - output_file_data ; address of this function
if codegen_second_pass != 0 goto genfs_check_addr
; first pass; record address of function
ident_list_add(functions_addresses, function_name, addr)
goto genfs_cont
:genfs_check_addr
c = ident_list_lookup(functions_addresses, function_name)
if c != addr goto function_addr_mismatch
goto genfs_cont
:genfs_cont
p = memchr(function_name, 0)
p += 1
generate_function(function_name, p)
function_name = p + 8
goto genfunctions_loop
:genfunctions_loop_end
return
:function_addr_mismatch
; address of function on 2nd pass doesn't line up with 1st pass
fputs(2, .str_function_addr_mismatch)
fputs(2, function_name)
exit(1)
:str_function_addr_mismatch
string Function address on first pass doesn't match 2nd pass:
byte 32
byte 0
function generate_code
local p_func
code_output = output_file_data + FUNCTIONS_ADDR
codegen_second_pass = 0
generate_functions()
code_output = output_file_data + FUNCTIONS_ADDR
codegen_second_pass = 1
generate_functions()
; generate code at the entry point of the executable
; @TODO
return

View file

@ -1,10 +1,13 @@
; this is the format of the executables we produce: ; this is the format of the executables we produce:
; elf header 4MB addresses 0x000000-0x400000 (no, it won't actually take up that much space) ; elf header 2MB addresses 0x000000-0x200000 (no, it won't actually take up that much space)
; code 4MB addresses 0x400000-0x7fffff ; entry point 2MB addresses 0x200000-0x3fffff this is where we put the code to call main(), etc. (again, it won't actually take up that much space)
; code (functions) 4MB addresses 0x400000-0x7fffff
; read-only data 4MB addresses 0x800000-0xbfffff ; read-only data 4MB addresses 0x800000-0xbfffff
; read-write data 4MB addresses 0xc00000-0xffffff ; read-write data 4MB addresses 0xc00000-0xffffff
; note that file offsets and runtime addresses are the same. ; note that file offsets and runtime addresses are the same.
; you should be able to change these constants without breaking anything: ; you should be able to change these constants without breaking anything:
#define ENTRY_ADDR 0x200000
#define FUNCTIONS_ADDR 0x400000
#define RODATA_ADDR 0x800000 #define RODATA_ADDR 0x800000
#define RWDATA_ADDR 0xc00000 #define RWDATA_ADDR 0xc00000
#define RWDATA_END 0x1000000 #define RWDATA_END 0x1000000

View file

@ -81,6 +81,7 @@ global function_param_has_no_name
#include preprocess.b #include preprocess.b
#include tokenize.b #include tokenize.b
#include parse.b #include parse.b
#include codegen.b
function types_init function types_init
argument _types argument _types
@ -235,6 +236,8 @@ function main
structure_locations = ident_list_create(2000000) structure_locations = ident_list_create(2000000)
global_variables = ident_list_create(400000) global_variables = ident_list_create(400000)
function_statements = ident_list_create(800000) function_statements = ident_list_create(800000)
functions_addresses = ident_list_create(800000)
functions_labels = ident_list_create(800000)
function_types = ident_list_create(800000) function_types = ident_list_create(800000)
function_stmt_data = malloc(800000) ; should be at least 40 bytes * max # of functions function_stmt_data = malloc(800000) ; should be at least 40 bytes * max # of functions
@ -285,6 +288,7 @@ function main
; NOTE: do NOT free pptokens; identifiers still reference them. ; NOTE: do NOT free pptokens; identifiers still reference them.
parse_tokens(tokens) parse_tokens(tokens)
generate_code()
p = output_file_data + RODATA_ADDR p = output_file_data + RODATA_ADDR
munmap(output_file_data, RWDATA_END) munmap(output_file_data, RWDATA_END)

View file

@ -1,5 +1,3 @@
#include "tests/parse_stb_truetype.h"
/* /*
; @NONSTANDARD: ; @NONSTANDARD:
; the following does not work: ; the following does not work:
@ -14,3 +12,6 @@ This needs to be fixed because otherwise you can't do:
struct A { struct B *blah; } struct A { struct B *blah; }
struct B { struct A *blah; } struct B { struct A *blah; }
*/ */
int main(void) {
}

View file

@ -105,6 +105,9 @@ In the table below, `IMM64` means a 64-bit *immediate* (a constant number).
`rdx:rax` refers to the 128-bit number you get by combining `rdx` and `rax`. `rdx:rax` refers to the 128-bit number you get by combining `rdx` and `rax`.
``` ```
ax bx cx dx sp bp si di
0 3 1 2 4 5 6 7
┌──────────────────────┬───────────────────┬────────────────────────────────────────┐ ┌──────────────────────┬───────────────────┬────────────────────────────────────────┐
│ Instruction │ Encoding │ Description │ │ Instruction │ Encoding │ Description │
├──────────────────────┼───────────────────┼────────────────────────────────────────┤ ├──────────────────────┼───────────────────┼────────────────────────────────────────┤