start codegen
This commit is contained in:
parent
d74dc53b0b
commit
3d44eba388
5 changed files with 210 additions and 4 deletions
195
05/codegen.b
Normal file
195
05/codegen.b
Normal file
|
@ -0,0 +1,195 @@
|
|||
; CALLING CONVENTION:
|
||||
; arguments are pushed onto the stack by the caller, from right to left
|
||||
; caller must also reserve space on stack for return value
|
||||
; so the function puts the return value at [rbp+8] (+8 for stored return address)
|
||||
|
||||
|
||||
|
||||
global code_output
|
||||
global codegen_second_pass ; = 0 on first global pass, 1 on second global pass
|
||||
global functions_addresses ; ident list of addresses
|
||||
global functions_labels ; ident list of ident lists of label addresses
|
||||
global curr_function_labels ; ident list of labels for current function (written to in 1st pass, read from in 2nd pass)
|
||||
|
||||
#define REG_RAX 0
|
||||
#define REG_RBX 3
|
||||
#define REG_RCX 1
|
||||
#define REG_RDX 2
|
||||
#define REG_RSP 4
|
||||
#define REG_RBP 5
|
||||
#define REG_RSI 6
|
||||
#define REG_RDI 7
|
||||
|
||||
function emit_byte
|
||||
argument byte
|
||||
*1code_output = byte
|
||||
code_output += 1
|
||||
return
|
||||
|
||||
function emit_bytes
|
||||
argument bytes
|
||||
argument count
|
||||
memcpy(code_output, bytes, count)
|
||||
code_output += count
|
||||
return
|
||||
|
||||
function emit_word
|
||||
argument word
|
||||
*2code_output = word
|
||||
code_output += 2
|
||||
return
|
||||
|
||||
function emit_dword
|
||||
argument word
|
||||
*4code_output = word
|
||||
code_output += 4
|
||||
return
|
||||
|
||||
function emit_qword
|
||||
argument word
|
||||
*8code_output = word
|
||||
code_output += 8
|
||||
return
|
||||
|
||||
; e.g. emit_mov_reg(REG_RAX, REG_RBX) emits mov rax, rbx
|
||||
function emit_mov_reg
|
||||
argument dest
|
||||
argument src
|
||||
local n
|
||||
|
||||
;48 89 (DEST|SRC<<3|0xc0)
|
||||
*2code_output = 0x8948
|
||||
code_output += 2
|
||||
n = 0xc0 | dest
|
||||
n |= src < 3
|
||||
*1code_output = n
|
||||
code_output += 1
|
||||
return
|
||||
|
||||
|
||||
function emit_sub_rsp_imm32
|
||||
argument imm32
|
||||
;48 81 ec IMM32
|
||||
*2code_output = 0x8148
|
||||
code_output += 2
|
||||
*1code_output = 0xec
|
||||
code_output += 1
|
||||
*4code_output = imm32
|
||||
code_output += 4
|
||||
return
|
||||
|
||||
function emit_mov_qword_rsp_rbp
|
||||
; 48 89 2c 24
|
||||
*4code_output = 0x242c8948
|
||||
code_output += 4
|
||||
return
|
||||
|
||||
function emit_mov_rbp_qword_rsp
|
||||
; 48 8b 2c 24
|
||||
*4code_output = 0x242c8b48
|
||||
code_output += 4
|
||||
return
|
||||
|
||||
function emit_add_rsp_imm32
|
||||
argument imm32
|
||||
;48 81 c4 IMM32
|
||||
*2code_output = 0x8148
|
||||
code_output += 2
|
||||
*1code_output = 0xc4
|
||||
code_output += 1
|
||||
*4code_output = imm32
|
||||
code_output += 4
|
||||
return
|
||||
|
||||
function emit_ret
|
||||
*1code_output = 0xc3
|
||||
code_output += 1
|
||||
return
|
||||
|
||||
; make sure you put the return value in the proper place before calling this
|
||||
function generate_return
|
||||
emit_mov_reg(REG_RSP, REG_RBP)
|
||||
emit_mov_rbp_qword_rsp()
|
||||
emit_add_rsp_imm32(8)
|
||||
emit_ret()
|
||||
return
|
||||
|
||||
function generate_statement
|
||||
argument statement
|
||||
; @TODO
|
||||
return
|
||||
|
||||
function generate_function
|
||||
argument function_name
|
||||
argument function_statement
|
||||
local out0
|
||||
|
||||
if codegen_second_pass != 0 goto genf_second_pass
|
||||
curr_function_labels = ident_list_create(4000) ; ~ 200 labels per function should be plenty
|
||||
ident_list_add(functions_labels, function_name, curr_function_labels)
|
||||
goto genf_cont
|
||||
:genf_second_pass
|
||||
curr_function_labels = ident_list_lookup(functions_labels, function_name)
|
||||
:genf_cont
|
||||
|
||||
; prologue
|
||||
emit_sub_rsp_imm32(8)
|
||||
emit_mov_qword_rsp_rbp()
|
||||
emit_mov_reg(REG_RBP, REG_RSP)
|
||||
|
||||
generate_statement(function_statement)
|
||||
|
||||
; implicit return at end of function
|
||||
generate_return()
|
||||
|
||||
return
|
||||
|
||||
function generate_functions
|
||||
local addr
|
||||
local c
|
||||
local p
|
||||
local function_name
|
||||
|
||||
function_name = function_statements
|
||||
|
||||
:genfunctions_loop
|
||||
if *1function_name == 0 goto genfunctions_loop_end
|
||||
addr = code_output - output_file_data ; address of this function
|
||||
if codegen_second_pass != 0 goto genfs_check_addr
|
||||
; first pass; record address of function
|
||||
ident_list_add(functions_addresses, function_name, addr)
|
||||
goto genfs_cont
|
||||
:genfs_check_addr
|
||||
c = ident_list_lookup(functions_addresses, function_name)
|
||||
if c != addr goto function_addr_mismatch
|
||||
goto genfs_cont
|
||||
:genfs_cont
|
||||
p = memchr(function_name, 0)
|
||||
p += 1
|
||||
generate_function(function_name, p)
|
||||
function_name = p + 8
|
||||
goto genfunctions_loop
|
||||
:genfunctions_loop_end
|
||||
return
|
||||
|
||||
:function_addr_mismatch
|
||||
; address of function on 2nd pass doesn't line up with 1st pass
|
||||
fputs(2, .str_function_addr_mismatch)
|
||||
fputs(2, function_name)
|
||||
exit(1)
|
||||
:str_function_addr_mismatch
|
||||
string Function address on first pass doesn't match 2nd pass:
|
||||
byte 32
|
||||
byte 0
|
||||
|
||||
function generate_code
|
||||
local p_func
|
||||
code_output = output_file_data + FUNCTIONS_ADDR
|
||||
codegen_second_pass = 0
|
||||
generate_functions()
|
||||
code_output = output_file_data + FUNCTIONS_ADDR
|
||||
codegen_second_pass = 1
|
||||
generate_functions()
|
||||
; generate code at the entry point of the executable
|
||||
; @TODO
|
||||
return
|
|
@ -1,10 +1,13 @@
|
|||
; this is the format of the executables we produce:
|
||||
; elf header 4MB addresses 0x000000-0x400000 (no, it won't actually take up that much space)
|
||||
; code 4MB addresses 0x400000-0x7fffff
|
||||
; elf header 2MB addresses 0x000000-0x200000 (no, it won't actually take up that much space)
|
||||
; entry point 2MB addresses 0x200000-0x3fffff this is where we put the code to call main(), etc. (again, it won't actually take up that much space)
|
||||
; code (functions) 4MB addresses 0x400000-0x7fffff
|
||||
; read-only data 4MB addresses 0x800000-0xbfffff
|
||||
; read-write data 4MB addresses 0xc00000-0xffffff
|
||||
; note that file offsets and runtime addresses are the same.
|
||||
; you should be able to change these constants without breaking anything:
|
||||
#define ENTRY_ADDR 0x200000
|
||||
#define FUNCTIONS_ADDR 0x400000
|
||||
#define RODATA_ADDR 0x800000
|
||||
#define RWDATA_ADDR 0xc00000
|
||||
#define RWDATA_END 0x1000000
|
||||
|
|
|
@ -81,6 +81,7 @@ global function_param_has_no_name
|
|||
#include preprocess.b
|
||||
#include tokenize.b
|
||||
#include parse.b
|
||||
#include codegen.b
|
||||
|
||||
function types_init
|
||||
argument _types
|
||||
|
@ -235,6 +236,8 @@ function main
|
|||
structure_locations = ident_list_create(2000000)
|
||||
global_variables = ident_list_create(400000)
|
||||
function_statements = ident_list_create(800000)
|
||||
functions_addresses = ident_list_create(800000)
|
||||
functions_labels = ident_list_create(800000)
|
||||
function_types = ident_list_create(800000)
|
||||
function_stmt_data = malloc(800000) ; should be at least 40 bytes * max # of functions
|
||||
|
||||
|
@ -285,6 +288,7 @@ function main
|
|||
; NOTE: do NOT free pptokens; identifiers still reference them.
|
||||
|
||||
parse_tokens(tokens)
|
||||
generate_code()
|
||||
|
||||
p = output_file_data + RODATA_ADDR
|
||||
munmap(output_file_data, RWDATA_END)
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
#include "tests/parse_stb_truetype.h"
|
||||
|
||||
/*
|
||||
; @NONSTANDARD:
|
||||
; the following does not work:
|
||||
|
@ -14,3 +12,6 @@ This needs to be fixed because otherwise you can't do:
|
|||
struct A { struct B *blah; }
|
||||
struct B { struct A *blah; }
|
||||
*/
|
||||
|
||||
int main(void) {
|
||||
}
|
||||
|
|
|
@ -105,6 +105,9 @@ In the table below, `IMM64` means a 64-bit *immediate* (a constant number).
|
|||
`rdx:rax` refers to the 128-bit number you get by combining `rdx` and `rax`.
|
||||
|
||||
```
|
||||
ax bx cx dx sp bp si di
|
||||
0 3 1 2 4 5 6 7
|
||||
|
||||
┌──────────────────────┬───────────────────┬────────────────────────────────────────┐
|
||||
│ Instruction │ Encoding │ Description │
|
||||
├──────────────────────┼───────────────────┼────────────────────────────────────────┤
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue